1"""Routines to help recognizing sound files. 2 3Function whathdr() recognizes various types of sound file headers. 4It understands almost all headers that SOX can decode. 5 6The return tuple contains the following items, in this order: 7- file type (as SOX understands it) 8- sampling rate (0 if unknown or hard to decode) 9- number of channels (0 if unknown or hard to decode) 10- number of frames in the file (-1 if unknown or hard to decode) 11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW 12 13If the file doesn't have a recognizable type, it returns None. 14If the file can't be opened, OSError is raised. 15 16To compute the total time, divide the number of frames by the 17sampling rate (a frame contains a sample for each channel). 18 19Function what() calls whathdr(). (It used to also use some 20heuristics for raw data, but this doesn't work very well.) 21 22Finally, the function test() is a simple main program that calls 23what() for all files mentioned on the argument list. For directory 24arguments it calls what() for all files in that directory. Default 25argument is "." (testing all files in the current directory). The 26option -r tells it to recurse down directories found inside 27explicitly given directories. 28""" 29 30import warnings 31 32warnings._deprecated(__name__, remove=(3, 13)) 33 34# The file structure is top-down except that the test program and its 35# subroutine come last. 36 37__all__ = ['what', 'whathdr'] 38 39from collections import namedtuple 40 41SndHeaders = namedtuple('SndHeaders', 42 'filetype framerate nchannels nframes sampwidth') 43 44SndHeaders.filetype.__doc__ = ("""The value for type indicates the data type 45and will be one of the strings 'aifc', 'aiff', 'au','hcom', 46'sndr', 'sndt', 'voc', 'wav', '8svx', 'sb', 'ub', or 'ul'.""") 47SndHeaders.framerate.__doc__ = ("""The sampling_rate will be either the actual 48value or 0 if unknown or difficult to decode.""") 49SndHeaders.nchannels.__doc__ = ("""The number of channels or 0 if it cannot be 50determined or if the value is difficult to decode.""") 51SndHeaders.nframes.__doc__ = ("""The value for frames will be either the number 52of frames or -1.""") 53SndHeaders.sampwidth.__doc__ = ("""Either the sample size in bits or 54'A' for A-LAW or 'U' for u-LAW.""") 55 56def what(filename): 57 """Guess the type of a sound file.""" 58 res = whathdr(filename) 59 return res 60 61 62def whathdr(filename): 63 """Recognize sound headers.""" 64 with open(filename, 'rb') as f: 65 h = f.read(512) 66 for tf in tests: 67 res = tf(h, f) 68 if res: 69 return SndHeaders(*res) 70 return None 71 72 73#-----------------------------------# 74# Subroutines per sound header type # 75#-----------------------------------# 76 77tests = [] 78 79def test_aifc(h, f): 80 """AIFC and AIFF files""" 81 with warnings.catch_warnings(): 82 warnings.simplefilter('ignore', category=DeprecationWarning) 83 import aifc 84 if not h.startswith(b'FORM'): 85 return None 86 if h[8:12] == b'AIFC': 87 fmt = 'aifc' 88 elif h[8:12] == b'AIFF': 89 fmt = 'aiff' 90 else: 91 return None 92 f.seek(0) 93 try: 94 a = aifc.open(f, 'r') 95 except (EOFError, aifc.Error): 96 return None 97 return (fmt, a.getframerate(), a.getnchannels(), 98 a.getnframes(), 8 * a.getsampwidth()) 99 100tests.append(test_aifc) 101 102 103def test_au(h, f): 104 """AU and SND files""" 105 if h.startswith(b'.snd'): 106 func = get_long_be 107 elif h[:4] in (b'\0ds.', b'dns.'): 108 func = get_long_le 109 else: 110 return None 111 filetype = 'au' 112 hdr_size = func(h[4:8]) 113 data_size = func(h[8:12]) 114 encoding = func(h[12:16]) 115 rate = func(h[16:20]) 116 nchannels = func(h[20:24]) 117 sample_size = 1 # default 118 if encoding == 1: 119 sample_bits = 'U' 120 elif encoding == 2: 121 sample_bits = 8 122 elif encoding == 3: 123 sample_bits = 16 124 sample_size = 2 125 else: 126 sample_bits = '?' 127 frame_size = sample_size * nchannels 128 if frame_size: 129 nframe = data_size / frame_size 130 else: 131 nframe = -1 132 return filetype, rate, nchannels, nframe, sample_bits 133 134tests.append(test_au) 135 136 137def test_hcom(h, f): 138 """HCOM file""" 139 if h[65:69] != b'FSSD' or h[128:132] != b'HCOM': 140 return None 141 divisor = get_long_be(h[144:148]) 142 if divisor: 143 rate = 22050 / divisor 144 else: 145 rate = 0 146 return 'hcom', rate, 1, -1, 8 147 148tests.append(test_hcom) 149 150 151def test_voc(h, f): 152 """VOC file""" 153 if not h.startswith(b'Creative Voice File\032'): 154 return None 155 sbseek = get_short_le(h[20:22]) 156 rate = 0 157 if 0 <= sbseek < 500 and h[sbseek] == 1: 158 ratecode = 256 - h[sbseek+4] 159 if ratecode: 160 rate = int(1000000.0 / ratecode) 161 return 'voc', rate, 1, -1, 8 162 163tests.append(test_voc) 164 165 166def test_wav(h, f): 167 """WAV file""" 168 import wave 169 # 'RIFF' <len> 'WAVE' 'fmt ' <len> 170 if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ': 171 return None 172 f.seek(0) 173 try: 174 w = wave.open(f, 'r') 175 except (EOFError, wave.Error): 176 return None 177 return ('wav', w.getframerate(), w.getnchannels(), 178 w.getnframes(), 8*w.getsampwidth()) 179 180tests.append(test_wav) 181 182 183def test_8svx(h, f): 184 """8SVX file""" 185 if not h.startswith(b'FORM') or h[8:12] != b'8SVX': 186 return None 187 # Should decode it to get #channels -- assume always 1 188 return '8svx', 0, 1, 0, 8 189 190tests.append(test_8svx) 191 192 193def test_sndt(h, f): 194 """SNDT file""" 195 if h.startswith(b'SOUND'): 196 nsamples = get_long_le(h[8:12]) 197 rate = get_short_le(h[20:22]) 198 return 'sndt', rate, 1, nsamples, 8 199 200tests.append(test_sndt) 201 202 203def test_sndr(h, f): 204 """SNDR file""" 205 if h.startswith(b'\0\0'): 206 rate = get_short_le(h[2:4]) 207 if 4000 <= rate <= 25000: 208 return 'sndr', rate, 1, -1, 8 209 210tests.append(test_sndr) 211 212 213#-------------------------------------------# 214# Subroutines to extract numbers from bytes # 215#-------------------------------------------# 216 217def get_long_be(b): 218 return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3] 219 220def get_long_le(b): 221 return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0] 222 223def get_short_be(b): 224 return (b[0] << 8) | b[1] 225 226def get_short_le(b): 227 return (b[1] << 8) | b[0] 228 229 230#--------------------# 231# Small test program # 232#--------------------# 233 234def test(): 235 import sys 236 recursive = 0 237 if sys.argv[1:] and sys.argv[1] == '-r': 238 del sys.argv[1:2] 239 recursive = 1 240 try: 241 if sys.argv[1:]: 242 testall(sys.argv[1:], recursive, 1) 243 else: 244 testall(['.'], recursive, 1) 245 except KeyboardInterrupt: 246 sys.stderr.write('\n[Interrupted]\n') 247 sys.exit(1) 248 249def testall(list, recursive, toplevel): 250 import sys 251 import os 252 for filename in list: 253 if os.path.isdir(filename): 254 print(filename + '/:', end=' ') 255 if recursive or toplevel: 256 print('recursing down:') 257 import glob 258 names = glob.glob(os.path.join(glob.escape(filename), '*')) 259 testall(names, recursive, 0) 260 else: 261 print('*** directory (use -r) ***') 262 else: 263 print(filename + ':', end=' ') 264 sys.stdout.flush() 265 try: 266 print(what(filename)) 267 except OSError: 268 print('*** not found ***') 269 270if __name__ == '__main__': 271 test() 272