1 /* csv module */
2
3 /*
4
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
7 module instead.
8
9 */
10
11 #define MODULE_VERSION "1.0"
12
13 #include "Python.h"
14 #include "structmember.h" // PyMemberDef
15 #include <stdbool.h>
16
17 /*[clinic input]
18 module _csv
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=385118b71aa43706]*/
21
22 #include "clinic/_csv.c.h"
23 #define NOT_SET ((Py_UCS4)-1)
24 #define EOL ((Py_UCS4)-2)
25
26
27 typedef struct {
28 PyObject *error_obj; /* CSV exception */
29 PyObject *dialects; /* Dialect registry */
30 PyTypeObject *dialect_type;
31 PyTypeObject *reader_type;
32 PyTypeObject *writer_type;
33 long field_limit; /* max parsed field size */
34 PyObject *str_write;
35 } _csvstate;
36
37 static struct PyModuleDef _csvmodule;
38
39 static inline _csvstate*
get_csv_state(PyObject * module)40 get_csv_state(PyObject *module)
41 {
42 void *state = PyModule_GetState(module);
43 assert(state != NULL);
44 return (_csvstate *)state;
45 }
46
47 static int
_csv_clear(PyObject * module)48 _csv_clear(PyObject *module)
49 {
50 _csvstate *module_state = PyModule_GetState(module);
51 Py_CLEAR(module_state->error_obj);
52 Py_CLEAR(module_state->dialects);
53 Py_CLEAR(module_state->dialect_type);
54 Py_CLEAR(module_state->reader_type);
55 Py_CLEAR(module_state->writer_type);
56 Py_CLEAR(module_state->str_write);
57 return 0;
58 }
59
60 static int
_csv_traverse(PyObject * module,visitproc visit,void * arg)61 _csv_traverse(PyObject *module, visitproc visit, void *arg)
62 {
63 _csvstate *module_state = PyModule_GetState(module);
64 Py_VISIT(module_state->error_obj);
65 Py_VISIT(module_state->dialects);
66 Py_VISIT(module_state->dialect_type);
67 Py_VISIT(module_state->reader_type);
68 Py_VISIT(module_state->writer_type);
69 return 0;
70 }
71
72 static void
_csv_free(void * module)73 _csv_free(void *module)
74 {
75 _csv_clear((PyObject *)module);
76 }
77
78 typedef enum {
79 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
80 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
81 EAT_CRNL,AFTER_ESCAPED_CRNL
82 } ParserState;
83
84 typedef enum {
85 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
86 } QuoteStyle;
87
88 typedef struct {
89 QuoteStyle style;
90 const char *name;
91 } StyleDesc;
92
93 static const StyleDesc quote_styles[] = {
94 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
95 { QUOTE_ALL, "QUOTE_ALL" },
96 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
97 { QUOTE_NONE, "QUOTE_NONE" },
98 { 0 }
99 };
100
101 typedef struct {
102 PyObject_HEAD
103
104 char doublequote; /* is " represented by ""? */
105 char skipinitialspace; /* ignore spaces following delimiter? */
106 char strict; /* raise exception on bad CSV */
107 int quoting; /* style of quoting to write */
108 Py_UCS4 delimiter; /* field separator */
109 Py_UCS4 quotechar; /* quote character */
110 Py_UCS4 escapechar; /* escape character */
111 PyObject *lineterminator; /* string to write between records */
112
113 } DialectObj;
114
115 typedef struct {
116 PyObject_HEAD
117
118 PyObject *input_iter; /* iterate over this for input lines */
119
120 DialectObj *dialect; /* parsing dialect */
121
122 PyObject *fields; /* field list for current record */
123 ParserState state; /* current CSV parse state */
124 Py_UCS4 *field; /* temporary buffer */
125 Py_ssize_t field_size; /* size of allocated buffer */
126 Py_ssize_t field_len; /* length of current field */
127 int numeric_field; /* treat field as numeric */
128 unsigned long line_num; /* Source-file line number */
129 } ReaderObj;
130
131 typedef struct {
132 PyObject_HEAD
133
134 PyObject *write; /* write output lines to this file */
135
136 DialectObj *dialect; /* parsing dialect */
137
138 Py_UCS4 *rec; /* buffer for parser.join */
139 Py_ssize_t rec_size; /* size of allocated record */
140 Py_ssize_t rec_len; /* length of record */
141 int num_fields; /* number of fields in record */
142
143 PyObject *error_obj; /* cached error object */
144 } WriterObj;
145
146 /*
147 * DIALECT class
148 */
149
150 static PyObject *
get_dialect_from_registry(PyObject * name_obj,_csvstate * module_state)151 get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
152 {
153 PyObject *dialect_obj;
154
155 dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
156 if (dialect_obj == NULL) {
157 if (!PyErr_Occurred())
158 PyErr_Format(module_state->error_obj, "unknown dialect");
159 }
160 else
161 Py_INCREF(dialect_obj);
162
163 return dialect_obj;
164 }
165
166 static PyObject *
get_char_or_None(Py_UCS4 c)167 get_char_or_None(Py_UCS4 c)
168 {
169 if (c == NOT_SET) {
170 Py_RETURN_NONE;
171 }
172 else
173 return PyUnicode_FromOrdinal(c);
174 }
175
176 static PyObject *
Dialect_get_lineterminator(DialectObj * self,void * Py_UNUSED (ignored))177 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
178 {
179 Py_XINCREF(self->lineterminator);
180 return self->lineterminator;
181 }
182
183 static PyObject *
Dialect_get_delimiter(DialectObj * self,void * Py_UNUSED (ignored))184 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
185 {
186 return get_char_or_None(self->delimiter);
187 }
188
189 static PyObject *
Dialect_get_escapechar(DialectObj * self,void * Py_UNUSED (ignored))190 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
191 {
192 return get_char_or_None(self->escapechar);
193 }
194
195 static PyObject *
Dialect_get_quotechar(DialectObj * self,void * Py_UNUSED (ignored))196 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
197 {
198 return get_char_or_None(self->quotechar);
199 }
200
201 static PyObject *
Dialect_get_quoting(DialectObj * self,void * Py_UNUSED (ignored))202 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
203 {
204 return PyLong_FromLong(self->quoting);
205 }
206
207 static int
_set_bool(const char * name,char * target,PyObject * src,bool dflt)208 _set_bool(const char *name, char *target, PyObject *src, bool dflt)
209 {
210 if (src == NULL)
211 *target = dflt;
212 else {
213 int b = PyObject_IsTrue(src);
214 if (b < 0)
215 return -1;
216 *target = (char)b;
217 }
218 return 0;
219 }
220
221 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)222 _set_int(const char *name, int *target, PyObject *src, int dflt)
223 {
224 if (src == NULL)
225 *target = dflt;
226 else {
227 int value;
228 if (!PyLong_CheckExact(src)) {
229 PyErr_Format(PyExc_TypeError,
230 "\"%s\" must be an integer", name);
231 return -1;
232 }
233 value = _PyLong_AsInt(src);
234 if (value == -1 && PyErr_Occurred()) {
235 return -1;
236 }
237 *target = value;
238 }
239 return 0;
240 }
241
242 static int
_set_char_or_none(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)243 _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
244 {
245 if (src == NULL) {
246 *target = dflt;
247 }
248 else {
249 *target = NOT_SET;
250 if (src != Py_None) {
251 if (!PyUnicode_Check(src)) {
252 PyErr_Format(PyExc_TypeError,
253 "\"%s\" must be string or None, not %.200s", name,
254 Py_TYPE(src)->tp_name);
255 return -1;
256 }
257 Py_ssize_t len = PyUnicode_GetLength(src);
258 if (len < 0) {
259 return -1;
260 }
261 if (len != 1) {
262 PyErr_Format(PyExc_TypeError,
263 "\"%s\" must be a 1-character string",
264 name);
265 return -1;
266 }
267 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
268 *target = PyUnicode_READ_CHAR(src, 0);
269 }
270 }
271 return 0;
272 }
273
274 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)275 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
276 {
277 if (src == NULL) {
278 *target = dflt;
279 }
280 else {
281 if (!PyUnicode_Check(src)) {
282 PyErr_Format(PyExc_TypeError,
283 "\"%s\" must be string, not %.200s", name,
284 Py_TYPE(src)->tp_name);
285 return -1;
286 }
287 Py_ssize_t len = PyUnicode_GetLength(src);
288 if (len < 0) {
289 return -1;
290 }
291 if (len != 1) {
292 PyErr_Format(PyExc_TypeError,
293 "\"%s\" must be a 1-character string",
294 name);
295 return -1;
296 }
297 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
298 *target = PyUnicode_READ_CHAR(src, 0);
299 }
300 return 0;
301 }
302
303 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)304 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
305 {
306 if (src == NULL)
307 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
308 else {
309 if (src == Py_None)
310 *target = NULL;
311 else if (!PyUnicode_Check(src)) {
312 PyErr_Format(PyExc_TypeError,
313 "\"%s\" must be a string", name);
314 return -1;
315 }
316 else {
317 if (PyUnicode_READY(src) == -1)
318 return -1;
319 Py_INCREF(src);
320 Py_XSETREF(*target, src);
321 }
322 }
323 return 0;
324 }
325
326 static int
dialect_check_quoting(int quoting)327 dialect_check_quoting(int quoting)
328 {
329 const StyleDesc *qs;
330
331 for (qs = quote_styles; qs->name; qs++) {
332 if ((int)qs->style == quoting)
333 return 0;
334 }
335 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
336 return -1;
337 }
338
339 #define D_OFF(x) offsetof(DialectObj, x)
340
341 static struct PyMemberDef Dialect_memberlist[] = {
342 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
343 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
344 { "strict", T_BOOL, D_OFF(strict), READONLY },
345 { NULL }
346 };
347
348 static PyGetSetDef Dialect_getsetlist[] = {
349 { "delimiter", (getter)Dialect_get_delimiter},
350 { "escapechar", (getter)Dialect_get_escapechar},
351 { "lineterminator", (getter)Dialect_get_lineterminator},
352 { "quotechar", (getter)Dialect_get_quotechar},
353 { "quoting", (getter)Dialect_get_quoting},
354 {NULL},
355 };
356
357 static void
Dialect_dealloc(DialectObj * self)358 Dialect_dealloc(DialectObj *self)
359 {
360 PyTypeObject *tp = Py_TYPE(self);
361 PyObject_GC_UnTrack(self);
362 tp->tp_clear((PyObject *)self);
363 PyObject_GC_Del(self);
364 Py_DECREF(tp);
365 }
366
367 static char *dialect_kws[] = {
368 "dialect",
369 "delimiter",
370 "doublequote",
371 "escapechar",
372 "lineterminator",
373 "quotechar",
374 "quoting",
375 "skipinitialspace",
376 "strict",
377 NULL
378 };
379
380 static _csvstate *
_csv_state_from_type(PyTypeObject * type,const char * name)381 _csv_state_from_type(PyTypeObject *type, const char *name)
382 {
383 PyObject *module = PyType_GetModuleByDef(type, &_csvmodule);
384 if (module == NULL) {
385 return NULL;
386 }
387 _csvstate *module_state = PyModule_GetState(module);
388 if (module_state == NULL) {
389 PyErr_Format(PyExc_SystemError,
390 "%s: No _csv module state found", name);
391 return NULL;
392 }
393 return module_state;
394 }
395
396 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)397 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
398 {
399 DialectObj *self;
400 PyObject *ret = NULL;
401 PyObject *dialect = NULL;
402 PyObject *delimiter = NULL;
403 PyObject *doublequote = NULL;
404 PyObject *escapechar = NULL;
405 PyObject *lineterminator = NULL;
406 PyObject *quotechar = NULL;
407 PyObject *quoting = NULL;
408 PyObject *skipinitialspace = NULL;
409 PyObject *strict = NULL;
410
411 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
412 "|OOOOOOOOO", dialect_kws,
413 &dialect,
414 &delimiter,
415 &doublequote,
416 &escapechar,
417 &lineterminator,
418 "echar,
419 "ing,
420 &skipinitialspace,
421 &strict))
422 return NULL;
423
424 _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
425 if (module_state == NULL) {
426 return NULL;
427 }
428
429 if (dialect != NULL) {
430 if (PyUnicode_Check(dialect)) {
431 dialect = get_dialect_from_registry(dialect, module_state);
432 if (dialect == NULL)
433 return NULL;
434 }
435 else
436 Py_INCREF(dialect);
437 /* Can we reuse this instance? */
438 if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
439 delimiter == NULL &&
440 doublequote == NULL &&
441 escapechar == NULL &&
442 lineterminator == NULL &&
443 quotechar == NULL &&
444 quoting == NULL &&
445 skipinitialspace == NULL &&
446 strict == NULL)
447 return dialect;
448 }
449
450 self = (DialectObj *)type->tp_alloc(type, 0);
451 if (self == NULL) {
452 Py_CLEAR(dialect);
453 return NULL;
454 }
455 self->lineterminator = NULL;
456
457 Py_XINCREF(delimiter);
458 Py_XINCREF(doublequote);
459 Py_XINCREF(escapechar);
460 Py_XINCREF(lineterminator);
461 Py_XINCREF(quotechar);
462 Py_XINCREF(quoting);
463 Py_XINCREF(skipinitialspace);
464 Py_XINCREF(strict);
465 if (dialect != NULL) {
466 #define DIALECT_GETATTR(v, n) \
467 do { \
468 if (v == NULL) { \
469 v = PyObject_GetAttrString(dialect, n); \
470 if (v == NULL) \
471 PyErr_Clear(); \
472 } \
473 } while (0)
474 DIALECT_GETATTR(delimiter, "delimiter");
475 DIALECT_GETATTR(doublequote, "doublequote");
476 DIALECT_GETATTR(escapechar, "escapechar");
477 DIALECT_GETATTR(lineterminator, "lineterminator");
478 DIALECT_GETATTR(quotechar, "quotechar");
479 DIALECT_GETATTR(quoting, "quoting");
480 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
481 DIALECT_GETATTR(strict, "strict");
482 }
483
484 /* check types and convert to C values */
485 #define DIASET(meth, name, target, src, dflt) \
486 if (meth(name, target, src, dflt)) \
487 goto err
488 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
489 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
490 DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, NOT_SET);
491 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
492 DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
493 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
494 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
495 DIASET(_set_bool, "strict", &self->strict, strict, false);
496
497 /* validate options */
498 if (dialect_check_quoting(self->quoting))
499 goto err;
500 if (self->delimiter == NOT_SET) {
501 PyErr_SetString(PyExc_TypeError,
502 "\"delimiter\" must be a 1-character string");
503 goto err;
504 }
505 if (quotechar == Py_None && quoting == NULL)
506 self->quoting = QUOTE_NONE;
507 if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) {
508 PyErr_SetString(PyExc_TypeError,
509 "quotechar must be set if quoting enabled");
510 goto err;
511 }
512 if (self->lineterminator == NULL) {
513 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
514 goto err;
515 }
516
517 ret = (PyObject *)self;
518 Py_INCREF(self);
519 err:
520 Py_CLEAR(self);
521 Py_CLEAR(dialect);
522 Py_CLEAR(delimiter);
523 Py_CLEAR(doublequote);
524 Py_CLEAR(escapechar);
525 Py_CLEAR(lineterminator);
526 Py_CLEAR(quotechar);
527 Py_CLEAR(quoting);
528 Py_CLEAR(skipinitialspace);
529 Py_CLEAR(strict);
530 return ret;
531 }
532
533 /* Since dialect is now a heap type, it inherits pickling method for
534 * protocol 0 and 1 from object, therefore it needs to be overridden */
535
536 PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
537
538 static PyObject *
Dialect_reduce(PyObject * self,PyObject * args)539 Dialect_reduce(PyObject *self, PyObject *args) {
540 PyErr_Format(PyExc_TypeError,
541 "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
542 return NULL;
543 }
544
545 static struct PyMethodDef dialect_methods[] = {
546 {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
547 {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
548 {NULL, NULL}
549 };
550
551 PyDoc_STRVAR(Dialect_Type_doc,
552 "CSV dialect\n"
553 "\n"
554 "The Dialect type records CSV parsing and generation options.\n");
555
556 static int
Dialect_clear(DialectObj * self)557 Dialect_clear(DialectObj *self)
558 {
559 Py_CLEAR(self->lineterminator);
560 return 0;
561 }
562
563 static int
Dialect_traverse(DialectObj * self,visitproc visit,void * arg)564 Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
565 {
566 Py_VISIT(self->lineterminator);
567 Py_VISIT(Py_TYPE(self));
568 return 0;
569 }
570
571 static PyType_Slot Dialect_Type_slots[] = {
572 {Py_tp_doc, (char*)Dialect_Type_doc},
573 {Py_tp_members, Dialect_memberlist},
574 {Py_tp_getset, Dialect_getsetlist},
575 {Py_tp_new, dialect_new},
576 {Py_tp_methods, dialect_methods},
577 {Py_tp_dealloc, Dialect_dealloc},
578 {Py_tp_clear, Dialect_clear},
579 {Py_tp_traverse, Dialect_traverse},
580 {0, NULL}
581 };
582
583 PyType_Spec Dialect_Type_spec = {
584 .name = "_csv.Dialect",
585 .basicsize = sizeof(DialectObj),
586 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
587 Py_TPFLAGS_IMMUTABLETYPE),
588 .slots = Dialect_Type_slots,
589 };
590
591
592 /*
593 * Return an instance of the dialect type, given a Python instance or kwarg
594 * description of the dialect
595 */
596 static PyObject *
_call_dialect(_csvstate * module_state,PyObject * dialect_inst,PyObject * kwargs)597 _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
598 {
599 PyObject *type = (PyObject *)module_state->dialect_type;
600 if (dialect_inst) {
601 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
602 }
603 else {
604 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
605 }
606 }
607
608 /*
609 * READER
610 */
611 static int
parse_save_field(ReaderObj * self)612 parse_save_field(ReaderObj *self)
613 {
614 PyObject *field;
615
616 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
617 (void *) self->field, self->field_len);
618 if (field == NULL)
619 return -1;
620 self->field_len = 0;
621 if (self->numeric_field) {
622 PyObject *tmp;
623
624 self->numeric_field = 0;
625 tmp = PyNumber_Float(field);
626 Py_DECREF(field);
627 if (tmp == NULL)
628 return -1;
629 field = tmp;
630 }
631 if (PyList_Append(self->fields, field) < 0) {
632 Py_DECREF(field);
633 return -1;
634 }
635 Py_DECREF(field);
636 return 0;
637 }
638
639 static int
parse_grow_buff(ReaderObj * self)640 parse_grow_buff(ReaderObj *self)
641 {
642 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
643
644 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
645 Py_UCS4 *field_new = self->field;
646 PyMem_Resize(field_new, Py_UCS4, field_size_new);
647 if (field_new == NULL) {
648 PyErr_NoMemory();
649 return 0;
650 }
651 self->field = field_new;
652 self->field_size = field_size_new;
653 return 1;
654 }
655
656 static int
parse_add_char(ReaderObj * self,_csvstate * module_state,Py_UCS4 c)657 parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
658 {
659 if (self->field_len >= module_state->field_limit) {
660 PyErr_Format(module_state->error_obj,
661 "field larger than field limit (%ld)",
662 module_state->field_limit);
663 return -1;
664 }
665 if (self->field_len == self->field_size && !parse_grow_buff(self))
666 return -1;
667 self->field[self->field_len++] = c;
668 return 0;
669 }
670
671 static int
parse_process_char(ReaderObj * self,_csvstate * module_state,Py_UCS4 c)672 parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
673 {
674 DialectObj *dialect = self->dialect;
675
676 switch (self->state) {
677 case START_RECORD:
678 /* start of record */
679 if (c == EOL)
680 /* empty line - return [] */
681 break;
682 else if (c == '\n' || c == '\r') {
683 self->state = EAT_CRNL;
684 break;
685 }
686 /* normal character - handle as START_FIELD */
687 self->state = START_FIELD;
688 /* fallthru */
689 case START_FIELD:
690 /* expecting field */
691 if (c == '\n' || c == '\r' || c == EOL) {
692 /* save empty field - return [fields] */
693 if (parse_save_field(self) < 0)
694 return -1;
695 self->state = (c == EOL ? START_RECORD : EAT_CRNL);
696 }
697 else if (c == dialect->quotechar &&
698 dialect->quoting != QUOTE_NONE) {
699 /* start quoted field */
700 self->state = IN_QUOTED_FIELD;
701 }
702 else if (c == dialect->escapechar) {
703 /* possible escaped character */
704 self->state = ESCAPED_CHAR;
705 }
706 else if (c == ' ' && dialect->skipinitialspace)
707 /* ignore spaces at start of field */
708 ;
709 else if (c == dialect->delimiter) {
710 /* save empty field */
711 if (parse_save_field(self) < 0)
712 return -1;
713 }
714 else {
715 /* begin new unquoted field */
716 if (dialect->quoting == QUOTE_NONNUMERIC)
717 self->numeric_field = 1;
718 if (parse_add_char(self, module_state, c) < 0)
719 return -1;
720 self->state = IN_FIELD;
721 }
722 break;
723
724 case ESCAPED_CHAR:
725 if (c == '\n' || c=='\r') {
726 if (parse_add_char(self, module_state, c) < 0)
727 return -1;
728 self->state = AFTER_ESCAPED_CRNL;
729 break;
730 }
731 if (c == EOL)
732 c = '\n';
733 if (parse_add_char(self, module_state, c) < 0)
734 return -1;
735 self->state = IN_FIELD;
736 break;
737
738 case AFTER_ESCAPED_CRNL:
739 if (c == EOL)
740 break;
741 /*fallthru*/
742
743 case IN_FIELD:
744 /* in unquoted field */
745 if (c == '\n' || c == '\r' || c == EOL) {
746 /* end of line - return [fields] */
747 if (parse_save_field(self) < 0)
748 return -1;
749 self->state = (c == EOL ? START_RECORD : EAT_CRNL);
750 }
751 else if (c == dialect->escapechar) {
752 /* possible escaped character */
753 self->state = ESCAPED_CHAR;
754 }
755 else if (c == dialect->delimiter) {
756 /* save field - wait for new field */
757 if (parse_save_field(self) < 0)
758 return -1;
759 self->state = START_FIELD;
760 }
761 else {
762 /* normal character - save in field */
763 if (parse_add_char(self, module_state, c) < 0)
764 return -1;
765 }
766 break;
767
768 case IN_QUOTED_FIELD:
769 /* in quoted field */
770 if (c == EOL)
771 ;
772 else if (c == dialect->escapechar) {
773 /* Possible escape character */
774 self->state = ESCAPE_IN_QUOTED_FIELD;
775 }
776 else if (c == dialect->quotechar &&
777 dialect->quoting != QUOTE_NONE) {
778 if (dialect->doublequote) {
779 /* doublequote; " represented by "" */
780 self->state = QUOTE_IN_QUOTED_FIELD;
781 }
782 else {
783 /* end of quote part of field */
784 self->state = IN_FIELD;
785 }
786 }
787 else {
788 /* normal character - save in field */
789 if (parse_add_char(self, module_state, c) < 0)
790 return -1;
791 }
792 break;
793
794 case ESCAPE_IN_QUOTED_FIELD:
795 if (c == EOL)
796 c = '\n';
797 if (parse_add_char(self, module_state, c) < 0)
798 return -1;
799 self->state = IN_QUOTED_FIELD;
800 break;
801
802 case QUOTE_IN_QUOTED_FIELD:
803 /* doublequote - seen a quote in a quoted field */
804 if (dialect->quoting != QUOTE_NONE &&
805 c == dialect->quotechar) {
806 /* save "" as " */
807 if (parse_add_char(self, module_state, c) < 0)
808 return -1;
809 self->state = IN_QUOTED_FIELD;
810 }
811 else if (c == dialect->delimiter) {
812 /* save field - wait for new field */
813 if (parse_save_field(self) < 0)
814 return -1;
815 self->state = START_FIELD;
816 }
817 else if (c == '\n' || c == '\r' || c == EOL) {
818 /* end of line - return [fields] */
819 if (parse_save_field(self) < 0)
820 return -1;
821 self->state = (c == EOL ? START_RECORD : EAT_CRNL);
822 }
823 else if (!dialect->strict) {
824 if (parse_add_char(self, module_state, c) < 0)
825 return -1;
826 self->state = IN_FIELD;
827 }
828 else {
829 /* illegal */
830 PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
831 dialect->delimiter,
832 dialect->quotechar);
833 return -1;
834 }
835 break;
836
837 case EAT_CRNL:
838 if (c == '\n' || c == '\r')
839 ;
840 else if (c == EOL)
841 self->state = START_RECORD;
842 else {
843 PyErr_Format(module_state->error_obj,
844 "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
845 return -1;
846 }
847 break;
848
849 }
850 return 0;
851 }
852
853 static int
parse_reset(ReaderObj * self)854 parse_reset(ReaderObj *self)
855 {
856 Py_XSETREF(self->fields, PyList_New(0));
857 if (self->fields == NULL)
858 return -1;
859 self->field_len = 0;
860 self->state = START_RECORD;
861 self->numeric_field = 0;
862 return 0;
863 }
864
865 static PyObject *
Reader_iternext(ReaderObj * self)866 Reader_iternext(ReaderObj *self)
867 {
868 PyObject *fields = NULL;
869 Py_UCS4 c;
870 Py_ssize_t pos, linelen;
871 unsigned int kind;
872 const void *data;
873 PyObject *lineobj;
874
875 _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
876 "Reader.__next__");
877 if (module_state == NULL) {
878 return NULL;
879 }
880
881 if (parse_reset(self) < 0)
882 return NULL;
883 do {
884 lineobj = PyIter_Next(self->input_iter);
885 if (lineobj == NULL) {
886 /* End of input OR exception */
887 if (!PyErr_Occurred() && (self->field_len != 0 ||
888 self->state == IN_QUOTED_FIELD)) {
889 if (self->dialect->strict)
890 PyErr_SetString(module_state->error_obj,
891 "unexpected end of data");
892 else if (parse_save_field(self) >= 0)
893 break;
894 }
895 return NULL;
896 }
897 if (!PyUnicode_Check(lineobj)) {
898 PyErr_Format(module_state->error_obj,
899 "iterator should return strings, "
900 "not %.200s "
901 "(the file should be opened in text mode)",
902 Py_TYPE(lineobj)->tp_name
903 );
904 Py_DECREF(lineobj);
905 return NULL;
906 }
907 if (PyUnicode_READY(lineobj) == -1) {
908 Py_DECREF(lineobj);
909 return NULL;
910 }
911 ++self->line_num;
912 kind = PyUnicode_KIND(lineobj);
913 data = PyUnicode_DATA(lineobj);
914 pos = 0;
915 linelen = PyUnicode_GET_LENGTH(lineobj);
916 while (linelen--) {
917 c = PyUnicode_READ(kind, data, pos);
918 if (parse_process_char(self, module_state, c) < 0) {
919 Py_DECREF(lineobj);
920 goto err;
921 }
922 pos++;
923 }
924 Py_DECREF(lineobj);
925 if (parse_process_char(self, module_state, EOL) < 0)
926 goto err;
927 } while (self->state != START_RECORD);
928
929 fields = self->fields;
930 self->fields = NULL;
931 err:
932 return fields;
933 }
934
935 static void
Reader_dealloc(ReaderObj * self)936 Reader_dealloc(ReaderObj *self)
937 {
938 PyTypeObject *tp = Py_TYPE(self);
939 PyObject_GC_UnTrack(self);
940 tp->tp_clear((PyObject *)self);
941 if (self->field != NULL) {
942 PyMem_Free(self->field);
943 self->field = NULL;
944 }
945 PyObject_GC_Del(self);
946 Py_DECREF(tp);
947 }
948
949 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)950 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
951 {
952 Py_VISIT(self->dialect);
953 Py_VISIT(self->input_iter);
954 Py_VISIT(self->fields);
955 Py_VISIT(Py_TYPE(self));
956 return 0;
957 }
958
959 static int
Reader_clear(ReaderObj * self)960 Reader_clear(ReaderObj *self)
961 {
962 Py_CLEAR(self->dialect);
963 Py_CLEAR(self->input_iter);
964 Py_CLEAR(self->fields);
965 return 0;
966 }
967
968 PyDoc_STRVAR(Reader_Type_doc,
969 "CSV reader\n"
970 "\n"
971 "Reader objects are responsible for reading and parsing tabular data\n"
972 "in CSV format.\n"
973 );
974
975 static struct PyMethodDef Reader_methods[] = {
976 { NULL, NULL }
977 };
978 #define R_OFF(x) offsetof(ReaderObj, x)
979
980 static struct PyMemberDef Reader_memberlist[] = {
981 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
982 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
983 { NULL }
984 };
985
986
987 static PyType_Slot Reader_Type_slots[] = {
988 {Py_tp_doc, (char*)Reader_Type_doc},
989 {Py_tp_traverse, Reader_traverse},
990 {Py_tp_iter, PyObject_SelfIter},
991 {Py_tp_iternext, Reader_iternext},
992 {Py_tp_methods, Reader_methods},
993 {Py_tp_members, Reader_memberlist},
994 {Py_tp_clear, Reader_clear},
995 {Py_tp_dealloc, Reader_dealloc},
996 {0, NULL}
997 };
998
999 PyType_Spec Reader_Type_spec = {
1000 .name = "_csv.reader",
1001 .basicsize = sizeof(ReaderObj),
1002 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1003 Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1004 .slots = Reader_Type_slots
1005 };
1006
1007
1008 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)1009 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
1010 {
1011 PyObject * iterator, * dialect = NULL;
1012 _csvstate *module_state = get_csv_state(module);
1013 ReaderObj * self = PyObject_GC_New(
1014 ReaderObj,
1015 module_state->reader_type);
1016
1017 if (!self)
1018 return NULL;
1019
1020 self->dialect = NULL;
1021 self->fields = NULL;
1022 self->input_iter = NULL;
1023 self->field = NULL;
1024 self->field_size = 0;
1025 self->line_num = 0;
1026
1027 if (parse_reset(self) < 0) {
1028 Py_DECREF(self);
1029 return NULL;
1030 }
1031
1032 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
1033 Py_DECREF(self);
1034 return NULL;
1035 }
1036 self->input_iter = PyObject_GetIter(iterator);
1037 if (self->input_iter == NULL) {
1038 Py_DECREF(self);
1039 return NULL;
1040 }
1041 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1042 keyword_args);
1043 if (self->dialect == NULL) {
1044 Py_DECREF(self);
1045 return NULL;
1046 }
1047
1048 PyObject_GC_Track(self);
1049 return (PyObject *)self;
1050 }
1051
1052 /*
1053 * WRITER
1054 */
1055 /* ---------------------------------------------------------------- */
1056 static void
join_reset(WriterObj * self)1057 join_reset(WriterObj *self)
1058 {
1059 self->rec_len = 0;
1060 self->num_fields = 0;
1061 }
1062
1063 #define MEM_INCR 32768
1064
1065 /* Calculate new record length or append field to record. Return new
1066 * record length.
1067 */
1068 static Py_ssize_t
join_append_data(WriterObj * self,unsigned int field_kind,const void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)1069 join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
1070 Py_ssize_t field_len, int *quoted,
1071 int copy_phase)
1072 {
1073 DialectObj *dialect = self->dialect;
1074 int i;
1075 Py_ssize_t rec_len;
1076
1077 #define INCLEN \
1078 do {\
1079 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1080 goto overflow; \
1081 } \
1082 rec_len++; \
1083 } while(0)
1084
1085 #define ADDCH(c) \
1086 do {\
1087 if (copy_phase) \
1088 self->rec[rec_len] = c;\
1089 INCLEN;\
1090 } while(0)
1091
1092 rec_len = self->rec_len;
1093
1094 /* If this is not the first field we need a field separator */
1095 if (self->num_fields > 0)
1096 ADDCH(dialect->delimiter);
1097
1098 /* Handle preceding quote */
1099 if (copy_phase && *quoted)
1100 ADDCH(dialect->quotechar);
1101
1102 /* Copy/count field data */
1103 /* If field is null just pass over */
1104 for (i = 0; field_data && (i < field_len); i++) {
1105 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1106 int want_escape = 0;
1107
1108 if (c == dialect->delimiter ||
1109 c == dialect->escapechar ||
1110 c == dialect->quotechar ||
1111 PyUnicode_FindChar(
1112 dialect->lineterminator, c, 0,
1113 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1114 if (dialect->quoting == QUOTE_NONE)
1115 want_escape = 1;
1116 else {
1117 if (c == dialect->quotechar) {
1118 if (dialect->doublequote)
1119 ADDCH(dialect->quotechar);
1120 else
1121 want_escape = 1;
1122 }
1123 else if (c == dialect->escapechar) {
1124 want_escape = 1;
1125 }
1126 if (!want_escape)
1127 *quoted = 1;
1128 }
1129 if (want_escape) {
1130 if (dialect->escapechar == NOT_SET) {
1131 PyErr_Format(self->error_obj,
1132 "need to escape, but no escapechar set");
1133 return -1;
1134 }
1135 ADDCH(dialect->escapechar);
1136 }
1137 }
1138 /* Copy field character into record buffer.
1139 */
1140 ADDCH(c);
1141 }
1142
1143 if (*quoted) {
1144 if (copy_phase)
1145 ADDCH(dialect->quotechar);
1146 else {
1147 INCLEN; /* starting quote */
1148 INCLEN; /* ending quote */
1149 }
1150 }
1151 return rec_len;
1152
1153 overflow:
1154 PyErr_NoMemory();
1155 return -1;
1156 #undef ADDCH
1157 #undef INCLEN
1158 }
1159
1160 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1161 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1162 {
1163 assert(rec_len >= 0);
1164
1165 if (rec_len > self->rec_size) {
1166 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1167 Py_UCS4 *rec_new = self->rec;
1168 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1169 if (rec_new == NULL) {
1170 PyErr_NoMemory();
1171 return 0;
1172 }
1173 self->rec = rec_new;
1174 self->rec_size = (Py_ssize_t)rec_size_new;
1175 }
1176 return 1;
1177 }
1178
1179 static int
join_append(WriterObj * self,PyObject * field,int quoted)1180 join_append(WriterObj *self, PyObject *field, int quoted)
1181 {
1182 unsigned int field_kind = -1;
1183 const void *field_data = NULL;
1184 Py_ssize_t field_len = 0;
1185 Py_ssize_t rec_len;
1186
1187 if (field != NULL) {
1188 if (PyUnicode_READY(field) == -1)
1189 return 0;
1190 field_kind = PyUnicode_KIND(field);
1191 field_data = PyUnicode_DATA(field);
1192 field_len = PyUnicode_GET_LENGTH(field);
1193 }
1194 rec_len = join_append_data(self, field_kind, field_data, field_len,
1195 "ed, 0);
1196 if (rec_len < 0)
1197 return 0;
1198
1199 /* grow record buffer if necessary */
1200 if (!join_check_rec_size(self, rec_len))
1201 return 0;
1202
1203 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1204 "ed, 1);
1205 self->num_fields++;
1206
1207 return 1;
1208 }
1209
1210 static int
join_append_lineterminator(WriterObj * self)1211 join_append_lineterminator(WriterObj *self)
1212 {
1213 Py_ssize_t terminator_len, i;
1214 unsigned int term_kind;
1215 const void *term_data;
1216
1217 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1218 if (terminator_len == -1)
1219 return 0;
1220
1221 /* grow record buffer if necessary */
1222 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1223 return 0;
1224
1225 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1226 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1227 for (i = 0; i < terminator_len; i++)
1228 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1229 self->rec_len += terminator_len;
1230
1231 return 1;
1232 }
1233
1234 PyDoc_STRVAR(csv_writerow_doc,
1235 "writerow(iterable)\n"
1236 "\n"
1237 "Construct and write a CSV record from an iterable of fields. Non-string\n"
1238 "elements will be converted to string.");
1239
1240 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1241 csv_writerow(WriterObj *self, PyObject *seq)
1242 {
1243 DialectObj *dialect = self->dialect;
1244 PyObject *iter, *field, *line, *result;
1245
1246 iter = PyObject_GetIter(seq);
1247 if (iter == NULL) {
1248 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1249 PyErr_Format(self->error_obj,
1250 "iterable expected, not %.200s",
1251 Py_TYPE(seq)->tp_name);
1252 }
1253 return NULL;
1254 }
1255
1256 /* Join all fields in internal buffer.
1257 */
1258 join_reset(self);
1259 while ((field = PyIter_Next(iter))) {
1260 int append_ok;
1261 int quoted;
1262
1263 switch (dialect->quoting) {
1264 case QUOTE_NONNUMERIC:
1265 quoted = !PyNumber_Check(field);
1266 break;
1267 case QUOTE_ALL:
1268 quoted = 1;
1269 break;
1270 default:
1271 quoted = 0;
1272 break;
1273 }
1274
1275 if (PyUnicode_Check(field)) {
1276 append_ok = join_append(self, field, quoted);
1277 Py_DECREF(field);
1278 }
1279 else if (field == Py_None) {
1280 append_ok = join_append(self, NULL, quoted);
1281 Py_DECREF(field);
1282 }
1283 else {
1284 PyObject *str;
1285
1286 str = PyObject_Str(field);
1287 Py_DECREF(field);
1288 if (str == NULL) {
1289 Py_DECREF(iter);
1290 return NULL;
1291 }
1292 append_ok = join_append(self, str, quoted);
1293 Py_DECREF(str);
1294 }
1295 if (!append_ok) {
1296 Py_DECREF(iter);
1297 return NULL;
1298 }
1299 }
1300 Py_DECREF(iter);
1301 if (PyErr_Occurred())
1302 return NULL;
1303
1304 if (self->num_fields > 0 && self->rec_len == 0) {
1305 if (dialect->quoting == QUOTE_NONE) {
1306 PyErr_Format(self->error_obj,
1307 "single empty field record must be quoted");
1308 return NULL;
1309 }
1310 self->num_fields--;
1311 if (!join_append(self, NULL, 1))
1312 return NULL;
1313 }
1314
1315 /* Add line terminator.
1316 */
1317 if (!join_append_lineterminator(self)) {
1318 return NULL;
1319 }
1320
1321 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1322 (void *) self->rec, self->rec_len);
1323 if (line == NULL) {
1324 return NULL;
1325 }
1326 result = PyObject_CallOneArg(self->write, line);
1327 Py_DECREF(line);
1328 return result;
1329 }
1330
1331 PyDoc_STRVAR(csv_writerows_doc,
1332 "writerows(iterable of iterables)\n"
1333 "\n"
1334 "Construct and write a series of iterables to a csv file. Non-string\n"
1335 "elements will be converted to string.");
1336
1337 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1338 csv_writerows(WriterObj *self, PyObject *seqseq)
1339 {
1340 PyObject *row_iter, *row_obj, *result;
1341
1342 row_iter = PyObject_GetIter(seqseq);
1343 if (row_iter == NULL) {
1344 return NULL;
1345 }
1346 while ((row_obj = PyIter_Next(row_iter))) {
1347 result = csv_writerow(self, row_obj);
1348 Py_DECREF(row_obj);
1349 if (!result) {
1350 Py_DECREF(row_iter);
1351 return NULL;
1352 }
1353 else
1354 Py_DECREF(result);
1355 }
1356 Py_DECREF(row_iter);
1357 if (PyErr_Occurred())
1358 return NULL;
1359 Py_RETURN_NONE;
1360 }
1361
1362 static struct PyMethodDef Writer_methods[] = {
1363 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1364 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1365 { NULL, NULL }
1366 };
1367
1368 #define W_OFF(x) offsetof(WriterObj, x)
1369
1370 static struct PyMemberDef Writer_memberlist[] = {
1371 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1372 { NULL }
1373 };
1374
1375 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1376 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1377 {
1378 Py_VISIT(self->dialect);
1379 Py_VISIT(self->write);
1380 Py_VISIT(self->error_obj);
1381 Py_VISIT(Py_TYPE(self));
1382 return 0;
1383 }
1384
1385 static int
Writer_clear(WriterObj * self)1386 Writer_clear(WriterObj *self)
1387 {
1388 Py_CLEAR(self->dialect);
1389 Py_CLEAR(self->write);
1390 Py_CLEAR(self->error_obj);
1391 return 0;
1392 }
1393
1394 static void
Writer_dealloc(WriterObj * self)1395 Writer_dealloc(WriterObj *self)
1396 {
1397 PyTypeObject *tp = Py_TYPE(self);
1398 PyObject_GC_UnTrack(self);
1399 tp->tp_clear((PyObject *)self);
1400 if (self->rec != NULL) {
1401 PyMem_Free(self->rec);
1402 }
1403 PyObject_GC_Del(self);
1404 Py_DECREF(tp);
1405 }
1406
1407 PyDoc_STRVAR(Writer_Type_doc,
1408 "CSV writer\n"
1409 "\n"
1410 "Writer objects are responsible for generating tabular data\n"
1411 "in CSV format from sequence input.\n"
1412 );
1413
1414 static PyType_Slot Writer_Type_slots[] = {
1415 {Py_tp_doc, (char*)Writer_Type_doc},
1416 {Py_tp_traverse, Writer_traverse},
1417 {Py_tp_clear, Writer_clear},
1418 {Py_tp_dealloc, Writer_dealloc},
1419 {Py_tp_methods, Writer_methods},
1420 {Py_tp_members, Writer_memberlist},
1421 {0, NULL}
1422 };
1423
1424 PyType_Spec Writer_Type_spec = {
1425 .name = "_csv.writer",
1426 .basicsize = sizeof(WriterObj),
1427 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1428 Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1429 .slots = Writer_Type_slots,
1430 };
1431
1432
1433 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1434 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1435 {
1436 PyObject * output_file, * dialect = NULL;
1437 _csvstate *module_state = get_csv_state(module);
1438 WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
1439
1440 if (!self)
1441 return NULL;
1442
1443 self->dialect = NULL;
1444 self->write = NULL;
1445
1446 self->rec = NULL;
1447 self->rec_size = 0;
1448 self->rec_len = 0;
1449 self->num_fields = 0;
1450
1451 self->error_obj = Py_NewRef(module_state->error_obj);
1452
1453 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1454 Py_DECREF(self);
1455 return NULL;
1456 }
1457 if (_PyObject_LookupAttr(output_file,
1458 module_state->str_write,
1459 &self->write) < 0) {
1460 Py_DECREF(self);
1461 return NULL;
1462 }
1463 if (self->write == NULL || !PyCallable_Check(self->write)) {
1464 PyErr_SetString(PyExc_TypeError,
1465 "argument 1 must have a \"write\" method");
1466 Py_DECREF(self);
1467 return NULL;
1468 }
1469 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1470 keyword_args);
1471 if (self->dialect == NULL) {
1472 Py_DECREF(self);
1473 return NULL;
1474 }
1475 PyObject_GC_Track(self);
1476 return (PyObject *)self;
1477 }
1478
1479 /*
1480 * DIALECT REGISTRY
1481 */
1482
1483 /*[clinic input]
1484 _csv.list_dialects
1485
1486 Return a list of all known dialect names.
1487
1488 names = csv.list_dialects()
1489 [clinic start generated code]*/
1490
1491 static PyObject *
_csv_list_dialects_impl(PyObject * module)1492 _csv_list_dialects_impl(PyObject *module)
1493 /*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/
1494 {
1495 return PyDict_Keys(get_csv_state(module)->dialects);
1496 }
1497
1498 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1499 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1500 {
1501 PyObject *name_obj, *dialect_obj = NULL;
1502 _csvstate *module_state = get_csv_state(module);
1503 PyObject *dialect;
1504
1505 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1506 return NULL;
1507 if (!PyUnicode_Check(name_obj)) {
1508 PyErr_SetString(PyExc_TypeError,
1509 "dialect name must be a string");
1510 return NULL;
1511 }
1512 if (PyUnicode_READY(name_obj) == -1)
1513 return NULL;
1514 dialect = _call_dialect(module_state, dialect_obj, kwargs);
1515 if (dialect == NULL)
1516 return NULL;
1517 if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
1518 Py_DECREF(dialect);
1519 return NULL;
1520 }
1521 Py_DECREF(dialect);
1522 Py_RETURN_NONE;
1523 }
1524
1525
1526 /*[clinic input]
1527 _csv.unregister_dialect
1528
1529 name: object
1530
1531 Delete the name/dialect mapping associated with a string name.
1532
1533 csv.unregister_dialect(name)
1534 [clinic start generated code]*/
1535
1536 static PyObject *
_csv_unregister_dialect_impl(PyObject * module,PyObject * name)1537 _csv_unregister_dialect_impl(PyObject *module, PyObject *name)
1538 /*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/
1539 {
1540 _csvstate *module_state = get_csv_state(module);
1541 if (PyDict_DelItem(module_state->dialects, name) < 0) {
1542 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1543 PyErr_Format(module_state->error_obj, "unknown dialect");
1544 }
1545 return NULL;
1546 }
1547 Py_RETURN_NONE;
1548 }
1549
1550 /*[clinic input]
1551 _csv.get_dialect
1552
1553 name: object
1554
1555 Return the dialect instance associated with name.
1556
1557 dialect = csv.get_dialect(name)
1558 [clinic start generated code]*/
1559
1560 static PyObject *
_csv_get_dialect_impl(PyObject * module,PyObject * name)1561 _csv_get_dialect_impl(PyObject *module, PyObject *name)
1562 /*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/
1563 {
1564 return get_dialect_from_registry(name, get_csv_state(module));
1565 }
1566
1567 /*[clinic input]
1568 _csv.field_size_limit
1569
1570 new_limit: object = NULL
1571
1572 Sets an upper limit on parsed fields.
1573
1574 csv.field_size_limit([limit])
1575
1576 Returns old limit. If limit is not given, no new limit is set and
1577 the old limit is returned
1578 [clinic start generated code]*/
1579
1580 static PyObject *
_csv_field_size_limit_impl(PyObject * module,PyObject * new_limit)1581 _csv_field_size_limit_impl(PyObject *module, PyObject *new_limit)
1582 /*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/
1583 {
1584 _csvstate *module_state = get_csv_state(module);
1585 long old_limit = module_state->field_limit;
1586 if (new_limit != NULL) {
1587 if (!PyLong_CheckExact(new_limit)) {
1588 PyErr_Format(PyExc_TypeError,
1589 "limit must be an integer");
1590 return NULL;
1591 }
1592 module_state->field_limit = PyLong_AsLong(new_limit);
1593 if (module_state->field_limit == -1 && PyErr_Occurred()) {
1594 module_state->field_limit = old_limit;
1595 return NULL;
1596 }
1597 }
1598 return PyLong_FromLong(old_limit);
1599 }
1600
1601 static PyType_Slot error_slots[] = {
1602 {0, NULL},
1603 };
1604
1605 PyType_Spec error_spec = {
1606 .name = "_csv.Error",
1607 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
1608 .slots = error_slots,
1609 };
1610
1611 /*
1612 * MODULE
1613 */
1614
1615 PyDoc_STRVAR(csv_module_doc,
1616 "CSV parsing and writing.\n"
1617 "\n"
1618 "This module provides classes that assist in the reading and writing\n"
1619 "of Comma Separated Value (CSV) files, and implements the interface\n"
1620 "described by PEP 305. Although many CSV files are simple to parse,\n"
1621 "the format is not formally defined by a stable specification and\n"
1622 "is subtle enough that parsing lines of a CSV file with something\n"
1623 "like line.split(\",\") is bound to fail. The module supports three\n"
1624 "basic APIs: reading, writing, and registration of dialects.\n"
1625 "\n"
1626 "\n"
1627 "DIALECT REGISTRATION:\n"
1628 "\n"
1629 "Readers and writers support a dialect argument, which is a convenient\n"
1630 "handle on a group of settings. When the dialect argument is a string,\n"
1631 "it identifies one of the dialects previously registered with the module.\n"
1632 "If it is a class or instance, the attributes of the argument are used as\n"
1633 "the settings for the reader or writer:\n"
1634 "\n"
1635 " class excel:\n"
1636 " delimiter = ','\n"
1637 " quotechar = '\"'\n"
1638 " escapechar = None\n"
1639 " doublequote = True\n"
1640 " skipinitialspace = False\n"
1641 " lineterminator = '\\r\\n'\n"
1642 " quoting = QUOTE_MINIMAL\n"
1643 "\n"
1644 "SETTINGS:\n"
1645 "\n"
1646 " * quotechar - specifies a one-character string to use as the\n"
1647 " quoting character. It defaults to '\"'.\n"
1648 " * delimiter - specifies a one-character string to use as the\n"
1649 " field separator. It defaults to ','.\n"
1650 " * skipinitialspace - specifies how to interpret spaces which\n"
1651 " immediately follow a delimiter. It defaults to False, which\n"
1652 " means that spaces immediately following a delimiter is part\n"
1653 " of the following field.\n"
1654 " * lineterminator - specifies the character sequence which should\n"
1655 " terminate rows.\n"
1656 " * quoting - controls when quotes should be generated by the writer.\n"
1657 " It can take on any of the following module constants:\n"
1658 "\n"
1659 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1660 " field contains either the quotechar or the delimiter\n"
1661 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1662 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1663 " fields which do not parse as integers or floating point\n"
1664 " numbers.\n"
1665 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1666 " * escapechar - specifies a one-character string used to escape\n"
1667 " the delimiter when quoting is set to QUOTE_NONE.\n"
1668 " * doublequote - controls the handling of quotes inside fields. When\n"
1669 " True, two consecutive quotes are interpreted as one during read,\n"
1670 " and when writing, each quote character embedded in the data is\n"
1671 " written as two quotes\n");
1672
1673 PyDoc_STRVAR(csv_reader_doc,
1674 " csv_reader = reader(iterable [, dialect='excel']\n"
1675 " [optional keyword args])\n"
1676 " for row in csv_reader:\n"
1677 " process(row)\n"
1678 "\n"
1679 "The \"iterable\" argument can be any object that returns a line\n"
1680 "of input for each iteration, such as a file object or a list. The\n"
1681 "optional \"dialect\" parameter is discussed below. The function\n"
1682 "also accepts optional keyword arguments which override settings\n"
1683 "provided by the dialect.\n"
1684 "\n"
1685 "The returned object is an iterator. Each iteration returns a row\n"
1686 "of the CSV file (which can span multiple input lines).\n");
1687
1688 PyDoc_STRVAR(csv_writer_doc,
1689 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1690 " [optional keyword args])\n"
1691 " for row in sequence:\n"
1692 " csv_writer.writerow(row)\n"
1693 "\n"
1694 " [or]\n"
1695 "\n"
1696 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1697 " [optional keyword args])\n"
1698 " csv_writer.writerows(rows)\n"
1699 "\n"
1700 "The \"fileobj\" argument can be any object that supports the file API.\n");
1701
1702 PyDoc_STRVAR(csv_register_dialect_doc,
1703 "Create a mapping from a string name to a dialect class.\n"
1704 " dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1705
1706 static struct PyMethodDef csv_methods[] = {
1707 { "reader", _PyCFunction_CAST(csv_reader),
1708 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1709 { "writer", _PyCFunction_CAST(csv_writer),
1710 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1711 { "register_dialect", _PyCFunction_CAST(csv_register_dialect),
1712 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1713 _CSV_LIST_DIALECTS_METHODDEF
1714 _CSV_UNREGISTER_DIALECT_METHODDEF
1715 _CSV_GET_DIALECT_METHODDEF
1716 _CSV_FIELD_SIZE_LIMIT_METHODDEF
1717 { NULL, NULL }
1718 };
1719
1720 static int
csv_exec(PyObject * module)1721 csv_exec(PyObject *module) {
1722 const StyleDesc *style;
1723 PyObject *temp;
1724 _csvstate *module_state = get_csv_state(module);
1725
1726 temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1727 module_state->dialect_type = (PyTypeObject *)temp;
1728 if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1729 return -1;
1730 }
1731
1732 temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1733 module_state->reader_type = (PyTypeObject *)temp;
1734 if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1735 return -1;
1736 }
1737
1738 temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1739 module_state->writer_type = (PyTypeObject *)temp;
1740 if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1741 return -1;
1742 }
1743
1744 /* Add version to the module. */
1745 if (PyModule_AddStringConstant(module, "__version__",
1746 MODULE_VERSION) == -1) {
1747 return -1;
1748 }
1749
1750 /* Set the field limit */
1751 module_state->field_limit = 128 * 1024;
1752
1753 /* Add _dialects dictionary */
1754 module_state->dialects = PyDict_New();
1755 if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1756 return -1;
1757 }
1758
1759 /* Add quote styles into dictionary */
1760 for (style = quote_styles; style->name; style++) {
1761 if (PyModule_AddIntConstant(module, style->name,
1762 style->style) == -1)
1763 return -1;
1764 }
1765
1766 /* Add the CSV exception object to the module. */
1767 PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1768 if (bases == NULL) {
1769 return -1;
1770 }
1771 module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1772 bases);
1773 Py_DECREF(bases);
1774 if (module_state->error_obj == NULL) {
1775 return -1;
1776 }
1777 if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1778 return -1;
1779 }
1780
1781 module_state->str_write = PyUnicode_InternFromString("write");
1782 if (module_state->str_write == NULL) {
1783 return -1;
1784 }
1785 return 0;
1786 }
1787
1788 static PyModuleDef_Slot csv_slots[] = {
1789 {Py_mod_exec, csv_exec},
1790 {0, NULL}
1791 };
1792
1793 static struct PyModuleDef _csvmodule = {
1794 PyModuleDef_HEAD_INIT,
1795 "_csv",
1796 csv_module_doc,
1797 sizeof(_csvstate),
1798 csv_methods,
1799 csv_slots,
1800 _csv_traverse,
1801 _csv_clear,
1802 _csv_free
1803 };
1804
1805 PyMODINIT_FUNC
PyInit__csv(void)1806 PyInit__csv(void)
1807 {
1808 return PyModuleDef_Init(&_csvmodule);
1809 }
1810