1*e1fe3e4aSElliott Hughes""" 2*e1fe3e4aSElliott HughesUser name to file name conversion. 3*e1fe3e4aSElliott HughesThis was taken from the UFO 3 spec. 4*e1fe3e4aSElliott Hughes""" 5*e1fe3e4aSElliott Hughes 6*e1fe3e4aSElliott Hughes# Restrictions are taken mostly from 7*e1fe3e4aSElliott Hughes# https://docs.microsoft.com/en-gb/windows/win32/fileio/naming-a-file#naming-conventions. 8*e1fe3e4aSElliott Hughes# 9*e1fe3e4aSElliott Hughes# 1. Integer value zero, sometimes referred to as the ASCII NUL character. 10*e1fe3e4aSElliott Hughes# 2. Characters whose integer representations are in the range 1 to 31, 11*e1fe3e4aSElliott Hughes# inclusive. 12*e1fe3e4aSElliott Hughes# 3. Various characters that (mostly) Windows and POSIX-y filesystems don't 13*e1fe3e4aSElliott Hughes# allow, plus "(" and ")", as per the specification. 14*e1fe3e4aSElliott HughesillegalCharacters = { 15*e1fe3e4aSElliott Hughes "\x00", 16*e1fe3e4aSElliott Hughes "\x01", 17*e1fe3e4aSElliott Hughes "\x02", 18*e1fe3e4aSElliott Hughes "\x03", 19*e1fe3e4aSElliott Hughes "\x04", 20*e1fe3e4aSElliott Hughes "\x05", 21*e1fe3e4aSElliott Hughes "\x06", 22*e1fe3e4aSElliott Hughes "\x07", 23*e1fe3e4aSElliott Hughes "\x08", 24*e1fe3e4aSElliott Hughes "\t", 25*e1fe3e4aSElliott Hughes "\n", 26*e1fe3e4aSElliott Hughes "\x0b", 27*e1fe3e4aSElliott Hughes "\x0c", 28*e1fe3e4aSElliott Hughes "\r", 29*e1fe3e4aSElliott Hughes "\x0e", 30*e1fe3e4aSElliott Hughes "\x0f", 31*e1fe3e4aSElliott Hughes "\x10", 32*e1fe3e4aSElliott Hughes "\x11", 33*e1fe3e4aSElliott Hughes "\x12", 34*e1fe3e4aSElliott Hughes "\x13", 35*e1fe3e4aSElliott Hughes "\x14", 36*e1fe3e4aSElliott Hughes "\x15", 37*e1fe3e4aSElliott Hughes "\x16", 38*e1fe3e4aSElliott Hughes "\x17", 39*e1fe3e4aSElliott Hughes "\x18", 40*e1fe3e4aSElliott Hughes "\x19", 41*e1fe3e4aSElliott Hughes "\x1a", 42*e1fe3e4aSElliott Hughes "\x1b", 43*e1fe3e4aSElliott Hughes "\x1c", 44*e1fe3e4aSElliott Hughes "\x1d", 45*e1fe3e4aSElliott Hughes "\x1e", 46*e1fe3e4aSElliott Hughes "\x1f", 47*e1fe3e4aSElliott Hughes '"', 48*e1fe3e4aSElliott Hughes "*", 49*e1fe3e4aSElliott Hughes "+", 50*e1fe3e4aSElliott Hughes "/", 51*e1fe3e4aSElliott Hughes ":", 52*e1fe3e4aSElliott Hughes "<", 53*e1fe3e4aSElliott Hughes ">", 54*e1fe3e4aSElliott Hughes "?", 55*e1fe3e4aSElliott Hughes "[", 56*e1fe3e4aSElliott Hughes "\\", 57*e1fe3e4aSElliott Hughes "]", 58*e1fe3e4aSElliott Hughes "(", 59*e1fe3e4aSElliott Hughes ")", 60*e1fe3e4aSElliott Hughes "|", 61*e1fe3e4aSElliott Hughes "\x7f", 62*e1fe3e4aSElliott Hughes} 63*e1fe3e4aSElliott HughesreservedFileNames = { 64*e1fe3e4aSElliott Hughes "aux", 65*e1fe3e4aSElliott Hughes "clock$", 66*e1fe3e4aSElliott Hughes "com1", 67*e1fe3e4aSElliott Hughes "com2", 68*e1fe3e4aSElliott Hughes "com3", 69*e1fe3e4aSElliott Hughes "com4", 70*e1fe3e4aSElliott Hughes "com5", 71*e1fe3e4aSElliott Hughes "com6", 72*e1fe3e4aSElliott Hughes "com7", 73*e1fe3e4aSElliott Hughes "com8", 74*e1fe3e4aSElliott Hughes "com9", 75*e1fe3e4aSElliott Hughes "con", 76*e1fe3e4aSElliott Hughes "lpt1", 77*e1fe3e4aSElliott Hughes "lpt2", 78*e1fe3e4aSElliott Hughes "lpt3", 79*e1fe3e4aSElliott Hughes "lpt4", 80*e1fe3e4aSElliott Hughes "lpt5", 81*e1fe3e4aSElliott Hughes "lpt6", 82*e1fe3e4aSElliott Hughes "lpt7", 83*e1fe3e4aSElliott Hughes "lpt8", 84*e1fe3e4aSElliott Hughes "lpt9", 85*e1fe3e4aSElliott Hughes "nul", 86*e1fe3e4aSElliott Hughes "prn", 87*e1fe3e4aSElliott Hughes} 88*e1fe3e4aSElliott HughesmaxFileNameLength = 255 89*e1fe3e4aSElliott Hughes 90*e1fe3e4aSElliott Hughes 91*e1fe3e4aSElliott Hughesclass NameTranslationError(Exception): 92*e1fe3e4aSElliott Hughes pass 93*e1fe3e4aSElliott Hughes 94*e1fe3e4aSElliott Hughes 95*e1fe3e4aSElliott Hughesdef userNameToFileName(userName: str, existing=(), prefix="", suffix=""): 96*e1fe3e4aSElliott Hughes """ 97*e1fe3e4aSElliott Hughes `existing` should be a set-like object. 98*e1fe3e4aSElliott Hughes 99*e1fe3e4aSElliott Hughes >>> userNameToFileName("a") == "a" 100*e1fe3e4aSElliott Hughes True 101*e1fe3e4aSElliott Hughes >>> userNameToFileName("A") == "A_" 102*e1fe3e4aSElliott Hughes True 103*e1fe3e4aSElliott Hughes >>> userNameToFileName("AE") == "A_E_" 104*e1fe3e4aSElliott Hughes True 105*e1fe3e4aSElliott Hughes >>> userNameToFileName("Ae") == "A_e" 106*e1fe3e4aSElliott Hughes True 107*e1fe3e4aSElliott Hughes >>> userNameToFileName("ae") == "ae" 108*e1fe3e4aSElliott Hughes True 109*e1fe3e4aSElliott Hughes >>> userNameToFileName("aE") == "aE_" 110*e1fe3e4aSElliott Hughes True 111*e1fe3e4aSElliott Hughes >>> userNameToFileName("a.alt") == "a.alt" 112*e1fe3e4aSElliott Hughes True 113*e1fe3e4aSElliott Hughes >>> userNameToFileName("A.alt") == "A_.alt" 114*e1fe3e4aSElliott Hughes True 115*e1fe3e4aSElliott Hughes >>> userNameToFileName("A.Alt") == "A_.A_lt" 116*e1fe3e4aSElliott Hughes True 117*e1fe3e4aSElliott Hughes >>> userNameToFileName("A.aLt") == "A_.aL_t" 118*e1fe3e4aSElliott Hughes True 119*e1fe3e4aSElliott Hughes >>> userNameToFileName(u"A.alT") == "A_.alT_" 120*e1fe3e4aSElliott Hughes True 121*e1fe3e4aSElliott Hughes >>> userNameToFileName("T_H") == "T__H_" 122*e1fe3e4aSElliott Hughes True 123*e1fe3e4aSElliott Hughes >>> userNameToFileName("T_h") == "T__h" 124*e1fe3e4aSElliott Hughes True 125*e1fe3e4aSElliott Hughes >>> userNameToFileName("t_h") == "t_h" 126*e1fe3e4aSElliott Hughes True 127*e1fe3e4aSElliott Hughes >>> userNameToFileName("F_F_I") == "F__F__I_" 128*e1fe3e4aSElliott Hughes True 129*e1fe3e4aSElliott Hughes >>> userNameToFileName("f_f_i") == "f_f_i" 130*e1fe3e4aSElliott Hughes True 131*e1fe3e4aSElliott Hughes >>> userNameToFileName("Aacute_V.swash") == "A_acute_V_.swash" 132*e1fe3e4aSElliott Hughes True 133*e1fe3e4aSElliott Hughes >>> userNameToFileName(".notdef") == "_notdef" 134*e1fe3e4aSElliott Hughes True 135*e1fe3e4aSElliott Hughes >>> userNameToFileName("con") == "_con" 136*e1fe3e4aSElliott Hughes True 137*e1fe3e4aSElliott Hughes >>> userNameToFileName("CON") == "C_O_N_" 138*e1fe3e4aSElliott Hughes True 139*e1fe3e4aSElliott Hughes >>> userNameToFileName("con.alt") == "_con.alt" 140*e1fe3e4aSElliott Hughes True 141*e1fe3e4aSElliott Hughes >>> userNameToFileName("alt.con") == "alt._con" 142*e1fe3e4aSElliott Hughes True 143*e1fe3e4aSElliott Hughes """ 144*e1fe3e4aSElliott Hughes # the incoming name must be a string 145*e1fe3e4aSElliott Hughes if not isinstance(userName, str): 146*e1fe3e4aSElliott Hughes raise ValueError("The value for userName must be a string.") 147*e1fe3e4aSElliott Hughes # establish the prefix and suffix lengths 148*e1fe3e4aSElliott Hughes prefixLength = len(prefix) 149*e1fe3e4aSElliott Hughes suffixLength = len(suffix) 150*e1fe3e4aSElliott Hughes # replace an initial period with an _ 151*e1fe3e4aSElliott Hughes # if no prefix is to be added 152*e1fe3e4aSElliott Hughes if not prefix and userName[0] == ".": 153*e1fe3e4aSElliott Hughes userName = "_" + userName[1:] 154*e1fe3e4aSElliott Hughes # filter the user name 155*e1fe3e4aSElliott Hughes filteredUserName = [] 156*e1fe3e4aSElliott Hughes for character in userName: 157*e1fe3e4aSElliott Hughes # replace illegal characters with _ 158*e1fe3e4aSElliott Hughes if character in illegalCharacters: 159*e1fe3e4aSElliott Hughes character = "_" 160*e1fe3e4aSElliott Hughes # add _ to all non-lower characters 161*e1fe3e4aSElliott Hughes elif character != character.lower(): 162*e1fe3e4aSElliott Hughes character += "_" 163*e1fe3e4aSElliott Hughes filteredUserName.append(character) 164*e1fe3e4aSElliott Hughes userName = "".join(filteredUserName) 165*e1fe3e4aSElliott Hughes # clip to 255 166*e1fe3e4aSElliott Hughes sliceLength = maxFileNameLength - prefixLength - suffixLength 167*e1fe3e4aSElliott Hughes userName = userName[:sliceLength] 168*e1fe3e4aSElliott Hughes # test for illegal files names 169*e1fe3e4aSElliott Hughes parts = [] 170*e1fe3e4aSElliott Hughes for part in userName.split("."): 171*e1fe3e4aSElliott Hughes if part.lower() in reservedFileNames: 172*e1fe3e4aSElliott Hughes part = "_" + part 173*e1fe3e4aSElliott Hughes parts.append(part) 174*e1fe3e4aSElliott Hughes userName = ".".join(parts) 175*e1fe3e4aSElliott Hughes # test for clash 176*e1fe3e4aSElliott Hughes fullName = prefix + userName + suffix 177*e1fe3e4aSElliott Hughes if fullName.lower() in existing: 178*e1fe3e4aSElliott Hughes fullName = handleClash1(userName, existing, prefix, suffix) 179*e1fe3e4aSElliott Hughes # finished 180*e1fe3e4aSElliott Hughes return fullName 181*e1fe3e4aSElliott Hughes 182*e1fe3e4aSElliott Hughes 183*e1fe3e4aSElliott Hughesdef handleClash1(userName, existing=[], prefix="", suffix=""): 184*e1fe3e4aSElliott Hughes """ 185*e1fe3e4aSElliott Hughes existing should be a case-insensitive list 186*e1fe3e4aSElliott Hughes of all existing file names. 187*e1fe3e4aSElliott Hughes 188*e1fe3e4aSElliott Hughes >>> prefix = ("0" * 5) + "." 189*e1fe3e4aSElliott Hughes >>> suffix = "." + ("0" * 10) 190*e1fe3e4aSElliott Hughes >>> existing = ["a" * 5] 191*e1fe3e4aSElliott Hughes 192*e1fe3e4aSElliott Hughes >>> e = list(existing) 193*e1fe3e4aSElliott Hughes >>> handleClash1(userName="A" * 5, existing=e, 194*e1fe3e4aSElliott Hughes ... prefix=prefix, suffix=suffix) == ( 195*e1fe3e4aSElliott Hughes ... '00000.AAAAA000000000000001.0000000000') 196*e1fe3e4aSElliott Hughes True 197*e1fe3e4aSElliott Hughes 198*e1fe3e4aSElliott Hughes >>> e = list(existing) 199*e1fe3e4aSElliott Hughes >>> e.append(prefix + "aaaaa" + "1".zfill(15) + suffix) 200*e1fe3e4aSElliott Hughes >>> handleClash1(userName="A" * 5, existing=e, 201*e1fe3e4aSElliott Hughes ... prefix=prefix, suffix=suffix) == ( 202*e1fe3e4aSElliott Hughes ... '00000.AAAAA000000000000002.0000000000') 203*e1fe3e4aSElliott Hughes True 204*e1fe3e4aSElliott Hughes 205*e1fe3e4aSElliott Hughes >>> e = list(existing) 206*e1fe3e4aSElliott Hughes >>> e.append(prefix + "AAAAA" + "2".zfill(15) + suffix) 207*e1fe3e4aSElliott Hughes >>> handleClash1(userName="A" * 5, existing=e, 208*e1fe3e4aSElliott Hughes ... prefix=prefix, suffix=suffix) == ( 209*e1fe3e4aSElliott Hughes ... '00000.AAAAA000000000000001.0000000000') 210*e1fe3e4aSElliott Hughes True 211*e1fe3e4aSElliott Hughes """ 212*e1fe3e4aSElliott Hughes # if the prefix length + user name length + suffix length + 15 is at 213*e1fe3e4aSElliott Hughes # or past the maximum length, silce 15 characters off of the user name 214*e1fe3e4aSElliott Hughes prefixLength = len(prefix) 215*e1fe3e4aSElliott Hughes suffixLength = len(suffix) 216*e1fe3e4aSElliott Hughes if prefixLength + len(userName) + suffixLength + 15 > maxFileNameLength: 217*e1fe3e4aSElliott Hughes l = prefixLength + len(userName) + suffixLength + 15 218*e1fe3e4aSElliott Hughes sliceLength = maxFileNameLength - l 219*e1fe3e4aSElliott Hughes userName = userName[:sliceLength] 220*e1fe3e4aSElliott Hughes finalName = None 221*e1fe3e4aSElliott Hughes # try to add numbers to create a unique name 222*e1fe3e4aSElliott Hughes counter = 1 223*e1fe3e4aSElliott Hughes while finalName is None: 224*e1fe3e4aSElliott Hughes name = userName + str(counter).zfill(15) 225*e1fe3e4aSElliott Hughes fullName = prefix + name + suffix 226*e1fe3e4aSElliott Hughes if fullName.lower() not in existing: 227*e1fe3e4aSElliott Hughes finalName = fullName 228*e1fe3e4aSElliott Hughes break 229*e1fe3e4aSElliott Hughes else: 230*e1fe3e4aSElliott Hughes counter += 1 231*e1fe3e4aSElliott Hughes if counter >= 999999999999999: 232*e1fe3e4aSElliott Hughes break 233*e1fe3e4aSElliott Hughes # if there is a clash, go to the next fallback 234*e1fe3e4aSElliott Hughes if finalName is None: 235*e1fe3e4aSElliott Hughes finalName = handleClash2(existing, prefix, suffix) 236*e1fe3e4aSElliott Hughes # finished 237*e1fe3e4aSElliott Hughes return finalName 238*e1fe3e4aSElliott Hughes 239*e1fe3e4aSElliott Hughes 240*e1fe3e4aSElliott Hughesdef handleClash2(existing=[], prefix="", suffix=""): 241*e1fe3e4aSElliott Hughes """ 242*e1fe3e4aSElliott Hughes existing should be a case-insensitive list 243*e1fe3e4aSElliott Hughes of all existing file names. 244*e1fe3e4aSElliott Hughes 245*e1fe3e4aSElliott Hughes >>> prefix = ("0" * 5) + "." 246*e1fe3e4aSElliott Hughes >>> suffix = "." + ("0" * 10) 247*e1fe3e4aSElliott Hughes >>> existing = [prefix + str(i) + suffix for i in range(100)] 248*e1fe3e4aSElliott Hughes 249*e1fe3e4aSElliott Hughes >>> e = list(existing) 250*e1fe3e4aSElliott Hughes >>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == ( 251*e1fe3e4aSElliott Hughes ... '00000.100.0000000000') 252*e1fe3e4aSElliott Hughes True 253*e1fe3e4aSElliott Hughes 254*e1fe3e4aSElliott Hughes >>> e = list(existing) 255*e1fe3e4aSElliott Hughes >>> e.remove(prefix + "1" + suffix) 256*e1fe3e4aSElliott Hughes >>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == ( 257*e1fe3e4aSElliott Hughes ... '00000.1.0000000000') 258*e1fe3e4aSElliott Hughes True 259*e1fe3e4aSElliott Hughes 260*e1fe3e4aSElliott Hughes >>> e = list(existing) 261*e1fe3e4aSElliott Hughes >>> e.remove(prefix + "2" + suffix) 262*e1fe3e4aSElliott Hughes >>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == ( 263*e1fe3e4aSElliott Hughes ... '00000.2.0000000000') 264*e1fe3e4aSElliott Hughes True 265*e1fe3e4aSElliott Hughes """ 266*e1fe3e4aSElliott Hughes # calculate the longest possible string 267*e1fe3e4aSElliott Hughes maxLength = maxFileNameLength - len(prefix) - len(suffix) 268*e1fe3e4aSElliott Hughes maxValue = int("9" * maxLength) 269*e1fe3e4aSElliott Hughes # try to find a number 270*e1fe3e4aSElliott Hughes finalName = None 271*e1fe3e4aSElliott Hughes counter = 1 272*e1fe3e4aSElliott Hughes while finalName is None: 273*e1fe3e4aSElliott Hughes fullName = prefix + str(counter) + suffix 274*e1fe3e4aSElliott Hughes if fullName.lower() not in existing: 275*e1fe3e4aSElliott Hughes finalName = fullName 276*e1fe3e4aSElliott Hughes break 277*e1fe3e4aSElliott Hughes else: 278*e1fe3e4aSElliott Hughes counter += 1 279*e1fe3e4aSElliott Hughes if counter >= maxValue: 280*e1fe3e4aSElliott Hughes break 281*e1fe3e4aSElliott Hughes # raise an error if nothing has been found 282*e1fe3e4aSElliott Hughes if finalName is None: 283*e1fe3e4aSElliott Hughes raise NameTranslationError("No unique name could be found.") 284*e1fe3e4aSElliott Hughes # finished 285*e1fe3e4aSElliott Hughes return finalName 286*e1fe3e4aSElliott Hughes 287*e1fe3e4aSElliott Hughes 288*e1fe3e4aSElliott Hughesif __name__ == "__main__": 289*e1fe3e4aSElliott Hughes import doctest 290*e1fe3e4aSElliott Hughes 291*e1fe3e4aSElliott Hughes doctest.testmod() 292