1 /* MD5 module */
2 
3 /* This module provides an interface to the MD5 algorithm */
4 
5 /* See below for information about the original code this module was
6    based upon. Additional work performed by:
7 
8    Andrew Kuchling ([email protected])
9    Greg Stein ([email protected])
10    Trevor Perrin ([email protected])
11 
12    Copyright (C) 2005-2007   Gregory P. Smith ([email protected])
13    Licensed to PSF under a Contributor Agreement.
14 
15 */
16 
17 /* MD5 objects */
18 #ifndef Py_BUILD_CORE_BUILTIN
19 #  define Py_BUILD_CORE_MODULE 1
20 #endif
21 
22 #include "Python.h"
23 #include "hashlib.h"
24 #include "pycore_strhex.h"        // _Py_strhex()
25 
26 /*[clinic input]
27 module _md5
28 class MD5Type "MD5object *" "&PyType_Type"
29 [clinic start generated code]*/
30 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/
31 
32 /* Some useful types */
33 
34 #if SIZEOF_INT == 4
35 typedef unsigned int MD5_INT32; /* 32-bit integer */
36 typedef long long MD5_INT64; /* 64-bit integer */
37 #else
38 /* not defined. compilation will die. */
39 #endif
40 
41 /* The MD5 block size and message digest sizes, in bytes */
42 
43 #define MD5_BLOCKSIZE    64
44 #define MD5_DIGESTSIZE   16
45 
46 /* The structure for storing MD5 info */
47 
48 struct md5_state {
49     MD5_INT64 length;
50     MD5_INT32 state[4], curlen;
51     unsigned char buf[MD5_BLOCKSIZE];
52 };
53 
54 typedef struct {
55     PyObject_HEAD
56 
57     struct md5_state hash_state;
58 } MD5object;
59 
60 #include "clinic/md5module.c.h"
61 
62 /* ------------------------------------------------------------------------
63  *
64  * This code for the MD5 algorithm was noted as public domain. The
65  * original headers are pasted below.
66  *
67  * Several changes have been made to make it more compatible with the
68  * Python environment and desired interface.
69  *
70  */
71 
72 /* LibTomCrypt, modular cryptographic library -- Tom St Denis
73  *
74  * LibTomCrypt is a library that provides various cryptographic
75  * algorithms in a highly modular and flexible manner.
76  *
77  * The library is free for all purposes without any express
78  * guarantee it works.
79  *
80  * Tom St Denis, [email protected], https://www.libtom.net
81  */
82 
83 /* rotate the hard way (platform optimizations could be done) */
84 #define ROLc(x, y) ( (((unsigned long)(x)<<(unsigned long)((y)&31)) | (((unsigned long)(x)&0xFFFFFFFFUL)>>(unsigned long)(32-((y)&31)))) & 0xFFFFFFFFUL)
85 
86 /* Endian Neutral macros that work on all platforms */
87 
88 #define STORE32L(x, y)                                                                     \
89      { (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255);   \
90        (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); }
91 
92 #define LOAD32L(x, y)                            \
93      { x = ((unsigned long)((y)[3] & 255)<<24) | \
94            ((unsigned long)((y)[2] & 255)<<16) | \
95            ((unsigned long)((y)[1] & 255)<<8)  | \
96            ((unsigned long)((y)[0] & 255)); }
97 
98 #define STORE64L(x, y)                                                                     \
99      { (y)[7] = (unsigned char)(((x)>>56)&255); (y)[6] = (unsigned char)(((x)>>48)&255);   \
100        (y)[5] = (unsigned char)(((x)>>40)&255); (y)[4] = (unsigned char)(((x)>>32)&255);   \
101        (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255);   \
102        (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); }
103 
104 
105 /* MD5 macros */
106 
107 #define F(x,y,z)  (z ^ (x & (y ^ z)))
108 #define G(x,y,z)  (y ^ (z & (y ^ x)))
109 #define H(x,y,z)  (x^y^z)
110 #define I(x,y,z)  (y^(x|(~z)))
111 
112 #define FF(a,b,c,d,M,s,t) \
113     a = (a + F(b,c,d) + M + t); a = ROLc(a, s) + b;
114 
115 #define GG(a,b,c,d,M,s,t) \
116     a = (a + G(b,c,d) + M + t); a = ROLc(a, s) + b;
117 
118 #define HH(a,b,c,d,M,s,t) \
119     a = (a + H(b,c,d) + M + t); a = ROLc(a, s) + b;
120 
121 #define II(a,b,c,d,M,s,t) \
122     a = (a + I(b,c,d) + M + t); a = ROLc(a, s) + b;
123 
124 
md5_compress(struct md5_state * md5,const unsigned char * buf)125 static void md5_compress(struct md5_state *md5, const unsigned char *buf)
126 {
127     MD5_INT32 i, W[16], a, b, c, d;
128 
129     assert(md5 != NULL);
130     assert(buf != NULL);
131 
132     /* copy the state into 512-bits into W[0..15] */
133     for (i = 0; i < 16; i++) {
134         LOAD32L(W[i], buf + (4*i));
135     }
136 
137     /* copy state */
138     a = md5->state[0];
139     b = md5->state[1];
140     c = md5->state[2];
141     d = md5->state[3];
142 
143     FF(a,b,c,d,W[0],7,0xd76aa478UL)
144     FF(d,a,b,c,W[1],12,0xe8c7b756UL)
145     FF(c,d,a,b,W[2],17,0x242070dbUL)
146     FF(b,c,d,a,W[3],22,0xc1bdceeeUL)
147     FF(a,b,c,d,W[4],7,0xf57c0fafUL)
148     FF(d,a,b,c,W[5],12,0x4787c62aUL)
149     FF(c,d,a,b,W[6],17,0xa8304613UL)
150     FF(b,c,d,a,W[7],22,0xfd469501UL)
151     FF(a,b,c,d,W[8],7,0x698098d8UL)
152     FF(d,a,b,c,W[9],12,0x8b44f7afUL)
153     FF(c,d,a,b,W[10],17,0xffff5bb1UL)
154     FF(b,c,d,a,W[11],22,0x895cd7beUL)
155     FF(a,b,c,d,W[12],7,0x6b901122UL)
156     FF(d,a,b,c,W[13],12,0xfd987193UL)
157     FF(c,d,a,b,W[14],17,0xa679438eUL)
158     FF(b,c,d,a,W[15],22,0x49b40821UL)
159     GG(a,b,c,d,W[1],5,0xf61e2562UL)
160     GG(d,a,b,c,W[6],9,0xc040b340UL)
161     GG(c,d,a,b,W[11],14,0x265e5a51UL)
162     GG(b,c,d,a,W[0],20,0xe9b6c7aaUL)
163     GG(a,b,c,d,W[5],5,0xd62f105dUL)
164     GG(d,a,b,c,W[10],9,0x02441453UL)
165     GG(c,d,a,b,W[15],14,0xd8a1e681UL)
166     GG(b,c,d,a,W[4],20,0xe7d3fbc8UL)
167     GG(a,b,c,d,W[9],5,0x21e1cde6UL)
168     GG(d,a,b,c,W[14],9,0xc33707d6UL)
169     GG(c,d,a,b,W[3],14,0xf4d50d87UL)
170     GG(b,c,d,a,W[8],20,0x455a14edUL)
171     GG(a,b,c,d,W[13],5,0xa9e3e905UL)
172     GG(d,a,b,c,W[2],9,0xfcefa3f8UL)
173     GG(c,d,a,b,W[7],14,0x676f02d9UL)
174     GG(b,c,d,a,W[12],20,0x8d2a4c8aUL)
175     HH(a,b,c,d,W[5],4,0xfffa3942UL)
176     HH(d,a,b,c,W[8],11,0x8771f681UL)
177     HH(c,d,a,b,W[11],16,0x6d9d6122UL)
178     HH(b,c,d,a,W[14],23,0xfde5380cUL)
179     HH(a,b,c,d,W[1],4,0xa4beea44UL)
180     HH(d,a,b,c,W[4],11,0x4bdecfa9UL)
181     HH(c,d,a,b,W[7],16,0xf6bb4b60UL)
182     HH(b,c,d,a,W[10],23,0xbebfbc70UL)
183     HH(a,b,c,d,W[13],4,0x289b7ec6UL)
184     HH(d,a,b,c,W[0],11,0xeaa127faUL)
185     HH(c,d,a,b,W[3],16,0xd4ef3085UL)
186     HH(b,c,d,a,W[6],23,0x04881d05UL)
187     HH(a,b,c,d,W[9],4,0xd9d4d039UL)
188     HH(d,a,b,c,W[12],11,0xe6db99e5UL)
189     HH(c,d,a,b,W[15],16,0x1fa27cf8UL)
190     HH(b,c,d,a,W[2],23,0xc4ac5665UL)
191     II(a,b,c,d,W[0],6,0xf4292244UL)
192     II(d,a,b,c,W[7],10,0x432aff97UL)
193     II(c,d,a,b,W[14],15,0xab9423a7UL)
194     II(b,c,d,a,W[5],21,0xfc93a039UL)
195     II(a,b,c,d,W[12],6,0x655b59c3UL)
196     II(d,a,b,c,W[3],10,0x8f0ccc92UL)
197     II(c,d,a,b,W[10],15,0xffeff47dUL)
198     II(b,c,d,a,W[1],21,0x85845dd1UL)
199     II(a,b,c,d,W[8],6,0x6fa87e4fUL)
200     II(d,a,b,c,W[15],10,0xfe2ce6e0UL)
201     II(c,d,a,b,W[6],15,0xa3014314UL)
202     II(b,c,d,a,W[13],21,0x4e0811a1UL)
203     II(a,b,c,d,W[4],6,0xf7537e82UL)
204     II(d,a,b,c,W[11],10,0xbd3af235UL)
205     II(c,d,a,b,W[2],15,0x2ad7d2bbUL)
206     II(b,c,d,a,W[9],21,0xeb86d391UL)
207 
208     md5->state[0] = md5->state[0] + a;
209     md5->state[1] = md5->state[1] + b;
210     md5->state[2] = md5->state[2] + c;
211     md5->state[3] = md5->state[3] + d;
212 }
213 
214 
215 /**
216    Initialize the hash state
217    @param md5   The hash state you wish to initialize
218 */
219 static void
md5_init(struct md5_state * md5)220 md5_init(struct md5_state *md5)
221 {
222     assert(md5 != NULL);
223     md5->state[0] = 0x67452301UL;
224     md5->state[1] = 0xefcdab89UL;
225     md5->state[2] = 0x98badcfeUL;
226     md5->state[3] = 0x10325476UL;
227     md5->curlen = 0;
228     md5->length = 0;
229 }
230 
231 /**
232    Process a block of memory though the hash
233    @param md5   The hash state
234    @param in     The data to hash
235    @param inlen  The length of the data (octets)
236 */
237 static void
md5_process(struct md5_state * md5,const unsigned char * in,Py_ssize_t inlen)238 md5_process(struct md5_state *md5, const unsigned char *in, Py_ssize_t inlen)
239 {
240     Py_ssize_t n;
241 
242     assert(md5 != NULL);
243     assert(in != NULL);
244     assert(md5->curlen <= sizeof(md5->buf));
245 
246     while (inlen > 0) {
247         if (md5->curlen == 0 && inlen >= MD5_BLOCKSIZE) {
248            md5_compress(md5, in);
249            md5->length    += MD5_BLOCKSIZE * 8;
250            in             += MD5_BLOCKSIZE;
251            inlen          -= MD5_BLOCKSIZE;
252         } else {
253            n = Py_MIN(inlen, (Py_ssize_t)(MD5_BLOCKSIZE - md5->curlen));
254            memcpy(md5->buf + md5->curlen, in, (size_t)n);
255            md5->curlen    += (MD5_INT32)n;
256            in             += n;
257            inlen          -= n;
258            if (md5->curlen == MD5_BLOCKSIZE) {
259               md5_compress(md5, md5->buf);
260               md5->length += 8*MD5_BLOCKSIZE;
261               md5->curlen = 0;
262            }
263        }
264     }
265 }
266 
267 /**
268    Terminate the hash to get the digest
269    @param md5  The hash state
270    @param out [out] The destination of the hash (16 bytes)
271 */
272 static void
md5_done(struct md5_state * md5,unsigned char * out)273 md5_done(struct md5_state *md5, unsigned char *out)
274 {
275     int i;
276 
277     assert(md5 != NULL);
278     assert(out != NULL);
279     assert(md5->curlen < sizeof(md5->buf));
280 
281     /* increase the length of the message */
282     md5->length += md5->curlen * 8;
283 
284     /* append the '1' bit */
285     md5->buf[md5->curlen++] = (unsigned char)0x80;
286 
287     /* if the length is currently above 56 bytes we append zeros
288      * then compress.  Then we can fall back to padding zeros and length
289      * encoding like normal.
290      */
291     if (md5->curlen > 56) {
292         while (md5->curlen < 64) {
293             md5->buf[md5->curlen++] = (unsigned char)0;
294         }
295         md5_compress(md5, md5->buf);
296         md5->curlen = 0;
297     }
298 
299     /* pad up to 56 bytes of zeroes */
300     while (md5->curlen < 56) {
301         md5->buf[md5->curlen++] = (unsigned char)0;
302     }
303 
304     /* store length */
305     STORE64L(md5->length, md5->buf+56);
306     md5_compress(md5, md5->buf);
307 
308     /* copy output */
309     for (i = 0; i < 4; i++) {
310         STORE32L(md5->state[i], out+(4*i));
311     }
312 }
313 
314 /* .Source: /cvs/libtom/libtomcrypt/src/hashes/md5.c,v $ */
315 /* .Revision: 1.10 $ */
316 /* .Date: 2007/05/12 14:25:28 $ */
317 
318 /*
319  * End of copied MD5 code.
320  *
321  * ------------------------------------------------------------------------
322  */
323 
324 typedef struct {
325     PyTypeObject* md5_type;
326 } MD5State;
327 
328 static inline MD5State*
md5_get_state(PyObject * module)329 md5_get_state(PyObject *module)
330 {
331     void *state = PyModule_GetState(module);
332     assert(state != NULL);
333     return (MD5State *)state;
334 }
335 
336 static MD5object *
newMD5object(MD5State * st)337 newMD5object(MD5State * st)
338 {
339     MD5object *md5 = (MD5object *)PyObject_GC_New(MD5object, st->md5_type);
340     PyObject_GC_Track(md5);
341     return md5;
342 }
343 
344 /* Internal methods for a hash object */
345 static int
MD5_traverse(PyObject * ptr,visitproc visit,void * arg)346 MD5_traverse(PyObject *ptr, visitproc visit, void *arg)
347 {
348     Py_VISIT(Py_TYPE(ptr));
349     return 0;
350 }
351 
352 static void
MD5_dealloc(PyObject * ptr)353 MD5_dealloc(PyObject *ptr)
354 {
355     PyTypeObject *tp = Py_TYPE(ptr);
356     PyObject_GC_UnTrack(ptr);
357     PyObject_GC_Del(ptr);
358     Py_DECREF(tp);
359 }
360 
361 
362 /* External methods for a hash object */
363 
364 /*[clinic input]
365 MD5Type.copy
366 
367     cls: defining_class
368 
369 Return a copy of the hash object.
370 [clinic start generated code]*/
371 
372 static PyObject *
MD5Type_copy_impl(MD5object * self,PyTypeObject * cls)373 MD5Type_copy_impl(MD5object *self, PyTypeObject *cls)
374 /*[clinic end generated code: output=bf055e08244bf5ee input=d89087dcfb2a8620]*/
375 {
376     MD5State *st = PyType_GetModuleState(cls);
377 
378     MD5object *newobj;
379     if ((newobj = newMD5object(st))==NULL)
380         return NULL;
381 
382     newobj->hash_state = self->hash_state;
383     return (PyObject *)newobj;
384 }
385 
386 /*[clinic input]
387 MD5Type.digest
388 
389 Return the digest value as a bytes object.
390 [clinic start generated code]*/
391 
392 static PyObject *
MD5Type_digest_impl(MD5object * self)393 MD5Type_digest_impl(MD5object *self)
394 /*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/
395 {
396     unsigned char digest[MD5_DIGESTSIZE];
397     struct md5_state temp;
398 
399     temp = self->hash_state;
400     md5_done(&temp, digest);
401     return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE);
402 }
403 
404 /*[clinic input]
405 MD5Type.hexdigest
406 
407 Return the digest value as a string of hexadecimal digits.
408 [clinic start generated code]*/
409 
410 static PyObject *
MD5Type_hexdigest_impl(MD5object * self)411 MD5Type_hexdigest_impl(MD5object *self)
412 /*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/
413 {
414     unsigned char digest[MD5_DIGESTSIZE];
415     struct md5_state temp;
416 
417     /* Get the raw (binary) digest value */
418     temp = self->hash_state;
419     md5_done(&temp, digest);
420 
421     return _Py_strhex((const char*)digest, MD5_DIGESTSIZE);
422 }
423 
424 /*[clinic input]
425 MD5Type.update
426 
427     obj: object
428     /
429 
430 Update this hash object's state with the provided string.
431 [clinic start generated code]*/
432 
433 static PyObject *
MD5Type_update(MD5object * self,PyObject * obj)434 MD5Type_update(MD5object *self, PyObject *obj)
435 /*[clinic end generated code: output=f6ad168416338423 input=6e1efcd9ecf17032]*/
436 {
437     Py_buffer buf;
438 
439     GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
440 
441     md5_process(&self->hash_state, buf.buf, buf.len);
442 
443     PyBuffer_Release(&buf);
444     Py_RETURN_NONE;
445 }
446 
447 static PyMethodDef MD5_methods[] = {
448     MD5TYPE_COPY_METHODDEF
449     MD5TYPE_DIGEST_METHODDEF
450     MD5TYPE_HEXDIGEST_METHODDEF
451     MD5TYPE_UPDATE_METHODDEF
452     {NULL,        NULL}         /* sentinel */
453 };
454 
455 static PyObject *
MD5_get_block_size(PyObject * self,void * closure)456 MD5_get_block_size(PyObject *self, void *closure)
457 {
458     return PyLong_FromLong(MD5_BLOCKSIZE);
459 }
460 
461 static PyObject *
MD5_get_name(PyObject * self,void * closure)462 MD5_get_name(PyObject *self, void *closure)
463 {
464     return PyUnicode_FromStringAndSize("md5", 3);
465 }
466 
467 static PyObject *
md5_get_digest_size(PyObject * self,void * closure)468 md5_get_digest_size(PyObject *self, void *closure)
469 {
470     return PyLong_FromLong(MD5_DIGESTSIZE);
471 }
472 
473 static PyGetSetDef MD5_getseters[] = {
474     {"block_size",
475      (getter)MD5_get_block_size, NULL,
476      NULL,
477      NULL},
478     {"name",
479      (getter)MD5_get_name, NULL,
480      NULL,
481      NULL},
482     {"digest_size",
483      (getter)md5_get_digest_size, NULL,
484      NULL,
485      NULL},
486     {NULL}  /* Sentinel */
487 };
488 
489 static PyType_Slot md5_type_slots[] = {
490     {Py_tp_dealloc, MD5_dealloc},
491     {Py_tp_methods, MD5_methods},
492     {Py_tp_getset, MD5_getseters},
493     {Py_tp_traverse, MD5_traverse},
494     {0,0}
495 };
496 
497 static PyType_Spec md5_type_spec = {
498     .name = "_md5.md5",
499     .basicsize =  sizeof(MD5object),
500     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION |
501               Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_HAVE_GC),
502     .slots = md5_type_slots
503 };
504 
505 /* The single module-level function: new() */
506 
507 /*[clinic input]
508 _md5.md5
509 
510     string: object(c_default="NULL") = b''
511     *
512     usedforsecurity: bool = True
513 
514 Return a new MD5 hash object; optionally initialized with a string.
515 [clinic start generated code]*/
516 
517 static PyObject *
_md5_md5_impl(PyObject * module,PyObject * string,int usedforsecurity)518 _md5_md5_impl(PyObject *module, PyObject *string, int usedforsecurity)
519 /*[clinic end generated code: output=587071f76254a4ac input=7a144a1905636985]*/
520 {
521     MD5object *new;
522     Py_buffer buf;
523 
524     if (string)
525         GET_BUFFER_VIEW_OR_ERROUT(string, &buf);
526 
527     MD5State *st = md5_get_state(module);
528     if ((new = newMD5object(st)) == NULL) {
529         if (string)
530             PyBuffer_Release(&buf);
531         return NULL;
532     }
533 
534     md5_init(&new->hash_state);
535 
536     if (PyErr_Occurred()) {
537         Py_DECREF(new);
538         if (string)
539             PyBuffer_Release(&buf);
540         return NULL;
541     }
542     if (string) {
543         md5_process(&new->hash_state, buf.buf, buf.len);
544         PyBuffer_Release(&buf);
545     }
546 
547     return (PyObject *)new;
548 }
549 
550 
551 /* List of functions exported by this module */
552 
553 static struct PyMethodDef MD5_functions[] = {
554     _MD5_MD5_METHODDEF
555     {NULL,      NULL}            /* Sentinel */
556 };
557 
558 static int
_md5_traverse(PyObject * module,visitproc visit,void * arg)559 _md5_traverse(PyObject *module, visitproc visit, void *arg)
560 {
561     MD5State *state = md5_get_state(module);
562     Py_VISIT(state->md5_type);
563     return 0;
564 }
565 
566 static int
_md5_clear(PyObject * module)567 _md5_clear(PyObject *module)
568 {
569     MD5State *state = md5_get_state(module);
570     Py_CLEAR(state->md5_type);
571     return 0;
572 }
573 
574 static void
_md5_free(void * module)575 _md5_free(void *module)
576 {
577     _md5_clear((PyObject *)module);
578 }
579 
580 /* Initialize this module. */
581 static int
md5_exec(PyObject * m)582 md5_exec(PyObject *m)
583 {
584     MD5State *st = md5_get_state(m);
585 
586     st->md5_type = (PyTypeObject *)PyType_FromModuleAndSpec(
587         m, &md5_type_spec, NULL);
588 
589     if (st->md5_type == NULL) {
590         return -1;
591     }
592 
593     Py_INCREF((PyObject *)st->md5_type);
594     if (PyModule_AddObject(m, "MD5Type", (PyObject *)st->md5_type) < 0) {
595          Py_DECREF(st->md5_type);
596         return -1;
597     }
598 
599     return 0;
600 }
601 
602 static PyModuleDef_Slot _md5_slots[] = {
603     {Py_mod_exec, md5_exec},
604     {0, NULL}
605 };
606 
607 
608 static struct PyModuleDef _md5module = {
609         PyModuleDef_HEAD_INIT,
610         .m_name = "_md5",
611         .m_size = sizeof(MD5State),
612         .m_methods = MD5_functions,
613         .m_slots = _md5_slots,
614         .m_traverse = _md5_traverse,
615         .m_clear = _md5_clear,
616         .m_free = _md5_free,
617 };
618 
619 PyMODINIT_FUNC
PyInit__md5(void)620 PyInit__md5(void)
621 {
622     return PyModuleDef_Init(&_md5module);
623 }
624