1 #include "Python.h"
2 #include "../Parser/tokenizer.h"
3 
4 static struct PyModuleDef _tokenizemodule;
5 
6 typedef struct {
7     PyTypeObject *TokenizerIter;
8 } tokenize_state;
9 
10 static tokenize_state *
get_tokenize_state(PyObject * module)11 get_tokenize_state(PyObject *module) {
12     return (tokenize_state *)PyModule_GetState(module);
13 }
14 
15 #define _tokenize_get_state_by_type(type) \
16     get_tokenize_state(PyType_GetModuleByDef(type, &_tokenizemodule))
17 
18 #include "clinic/Python-tokenize.c.h"
19 
20 /*[clinic input]
21 module _tokenizer
22 class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter"
23 [clinic start generated code]*/
24 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/
25 
26 typedef struct
27 {
28     PyObject_HEAD struct tok_state *tok;
29 } tokenizeriterobject;
30 
31 /*[clinic input]
32 @classmethod
33 _tokenizer.tokenizeriter.__new__ as tokenizeriter_new
34 
35     source: str
36 [clinic start generated code]*/
37 
38 static PyObject *
tokenizeriter_new_impl(PyTypeObject * type,const char * source)39 tokenizeriter_new_impl(PyTypeObject *type, const char *source)
40 /*[clinic end generated code: output=7fd9f46cf9263cbb input=4384b368407375c6]*/
41 {
42     tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0);
43     if (self == NULL) {
44         return NULL;
45     }
46     PyObject *filename = PyUnicode_FromString("<string>");
47     if (filename == NULL) {
48         return NULL;
49     }
50     self->tok = _PyTokenizer_FromUTF8(source, 1);
51     if (self->tok == NULL) {
52         Py_DECREF(filename);
53         return NULL;
54     }
55     self->tok->filename = filename;
56     return (PyObject *)self;
57 }
58 
59 static PyObject *
tokenizeriter_next(tokenizeriterobject * it)60 tokenizeriter_next(tokenizeriterobject *it)
61 {
62     const char *start;
63     const char *end;
64     int type = _PyTokenizer_Get(it->tok, &start, &end);
65     if (type == ERRORTOKEN && PyErr_Occurred()) {
66         return NULL;
67     }
68     if (type == ERRORTOKEN || type == ENDMARKER) {
69         PyErr_SetString(PyExc_StopIteration, "EOF");
70         return NULL;
71     }
72     PyObject *str = NULL;
73     if (start == NULL || end == NULL) {
74         str = PyUnicode_FromString("");
75     }
76     else {
77         str = PyUnicode_FromStringAndSize(start, end - start);
78     }
79     if (str == NULL) {
80         return NULL;
81     }
82 
83     Py_ssize_t size = it->tok->inp - it->tok->buf;
84     PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace");
85     if (line == NULL) {
86         Py_DECREF(str);
87         return NULL;
88     }
89     const char *line_start = type == STRING ? it->tok->multi_line_start : it->tok->line_start;
90     int lineno = type == STRING ? it->tok->first_lineno : it->tok->lineno;
91     int end_lineno = it->tok->lineno;
92     int col_offset = -1;
93     int end_col_offset = -1;
94     if (start != NULL && start >= line_start) {
95         col_offset = (int)(start - line_start);
96     }
97     if (end != NULL && end >= it->tok->line_start) {
98         end_col_offset = (int)(end - it->tok->line_start);
99     }
100 
101     return Py_BuildValue("(NiiiiiN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line);
102 }
103 
104 static void
tokenizeriter_dealloc(tokenizeriterobject * it)105 tokenizeriter_dealloc(tokenizeriterobject *it)
106 {
107     PyTypeObject *tp = Py_TYPE(it);
108     _PyTokenizer_Free(it->tok);
109     tp->tp_free(it);
110     Py_DECREF(tp);
111 }
112 
113 static PyType_Slot tokenizeriter_slots[] = {
114     {Py_tp_new, tokenizeriter_new},
115     {Py_tp_dealloc, tokenizeriter_dealloc},
116     {Py_tp_getattro, PyObject_GenericGetAttr},
117     {Py_tp_iter, PyObject_SelfIter},
118     {Py_tp_iternext, tokenizeriter_next},
119     {0, NULL},
120 };
121 
122 static PyType_Spec tokenizeriter_spec = {
123     .name = "_tokenize.TokenizerIter",
124     .basicsize = sizeof(tokenizeriterobject),
125     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
126     .slots = tokenizeriter_slots,
127 };
128 
129 static int
tokenizemodule_exec(PyObject * m)130 tokenizemodule_exec(PyObject *m)
131 {
132     tokenize_state *state = get_tokenize_state(m);
133     if (state == NULL) {
134         return -1;
135     }
136 
137     state->TokenizerIter = (PyTypeObject *)PyType_FromModuleAndSpec(m, &tokenizeriter_spec, NULL);
138     if (state->TokenizerIter == NULL) {
139         return -1;
140     }
141     if (PyModule_AddType(m, state->TokenizerIter) < 0) {
142         return -1;
143     }
144 
145     return 0;
146 }
147 
148 static PyMethodDef tokenize_methods[] = {
149     {NULL, NULL, 0, NULL} /* Sentinel */
150 };
151 
152 static PyModuleDef_Slot tokenizemodule_slots[] = {
153     {Py_mod_exec, tokenizemodule_exec},
154     {0, NULL}
155 };
156 
157 static int
tokenizemodule_traverse(PyObject * m,visitproc visit,void * arg)158 tokenizemodule_traverse(PyObject *m, visitproc visit, void *arg)
159 {
160     tokenize_state *state = get_tokenize_state(m);
161     Py_VISIT(state->TokenizerIter);
162     return 0;
163 }
164 
165 static int
tokenizemodule_clear(PyObject * m)166 tokenizemodule_clear(PyObject *m)
167 {
168     tokenize_state *state = get_tokenize_state(m);
169     Py_CLEAR(state->TokenizerIter);
170     return 0;
171 }
172 
173 static void
tokenizemodule_free(void * m)174 tokenizemodule_free(void *m)
175 {
176     tokenizemodule_clear((PyObject *)m);
177 }
178 
179 static struct PyModuleDef _tokenizemodule = {
180     PyModuleDef_HEAD_INIT,
181     .m_name = "_tokenize",
182     .m_size = sizeof(tokenize_state),
183     .m_slots = tokenizemodule_slots,
184     .m_methods = tokenize_methods,
185     .m_traverse = tokenizemodule_traverse,
186     .m_clear = tokenizemodule_clear,
187     .m_free = tokenizemodule_free,
188 };
189 
190 PyMODINIT_FUNC
PyInit__tokenize(void)191 PyInit__tokenize(void)
192 {
193     return PyModuleDef_Init(&_tokenizemodule);
194 }
195