1"""Routines to help recognizing sound files.
2
3Function whathdr() recognizes various types of sound file headers.
4It understands almost all headers that SOX can decode.
5
6The return tuple contains the following items, in this order:
7- file type (as SOX understands it)
8- sampling rate (0 if unknown or hard to decode)
9- number of channels (0 if unknown or hard to decode)
10- number of frames in the file (-1 if unknown or hard to decode)
11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13If the file doesn't have a recognizable type, it returns None.
14If the file can't be opened, OSError is raised.
15
16To compute the total time, divide the number of frames by the
17sampling rate (a frame contains a sample for each channel).
18
19Function what() calls whathdr().  (It used to also use some
20heuristics for raw data, but this doesn't work very well.)
21
22Finally, the function test() is a simple main program that calls
23what() for all files mentioned on the argument list.  For directory
24arguments it calls what() for all files in that directory.  Default
25argument is "." (testing all files in the current directory).  The
26option -r tells it to recurse down directories found inside
27explicitly given directories.
28"""
29
30import warnings
31
32warnings._deprecated(__name__, remove=(3, 13))
33
34# The file structure is top-down except that the test program and its
35# subroutine come last.
36
37__all__ = ['what', 'whathdr']
38
39from collections import namedtuple
40
41SndHeaders = namedtuple('SndHeaders',
42                        'filetype framerate nchannels nframes sampwidth')
43
44SndHeaders.filetype.__doc__ = ("""The value for type indicates the data type
45and will be one of the strings 'aifc', 'aiff', 'au','hcom',
46'sndr', 'sndt', 'voc', 'wav', '8svx', 'sb', 'ub', or 'ul'.""")
47SndHeaders.framerate.__doc__ = ("""The sampling_rate will be either the actual
48value or 0 if unknown or difficult to decode.""")
49SndHeaders.nchannels.__doc__ = ("""The number of channels or 0 if it cannot be
50determined or if the value is difficult to decode.""")
51SndHeaders.nframes.__doc__ = ("""The value for frames will be either the number
52of frames or -1.""")
53SndHeaders.sampwidth.__doc__ = ("""Either the sample size in bits or
54'A' for A-LAW or 'U' for u-LAW.""")
55
56def what(filename):
57    """Guess the type of a sound file."""
58    res = whathdr(filename)
59    return res
60
61
62def whathdr(filename):
63    """Recognize sound headers."""
64    with open(filename, 'rb') as f:
65        h = f.read(512)
66        for tf in tests:
67            res = tf(h, f)
68            if res:
69                return SndHeaders(*res)
70        return None
71
72
73#-----------------------------------#
74# Subroutines per sound header type #
75#-----------------------------------#
76
77tests = []
78
79def test_aifc(h, f):
80    """AIFC and AIFF files"""
81    with warnings.catch_warnings():
82        warnings.simplefilter('ignore', category=DeprecationWarning)
83        import aifc
84    if not h.startswith(b'FORM'):
85        return None
86    if h[8:12] == b'AIFC':
87        fmt = 'aifc'
88    elif h[8:12] == b'AIFF':
89        fmt = 'aiff'
90    else:
91        return None
92    f.seek(0)
93    try:
94        a = aifc.open(f, 'r')
95    except (EOFError, aifc.Error):
96        return None
97    return (fmt, a.getframerate(), a.getnchannels(),
98            a.getnframes(), 8 * a.getsampwidth())
99
100tests.append(test_aifc)
101
102
103def test_au(h, f):
104    """AU and SND files"""
105    if h.startswith(b'.snd'):
106        func = get_long_be
107    elif h[:4] in (b'\0ds.', b'dns.'):
108        func = get_long_le
109    else:
110        return None
111    filetype = 'au'
112    hdr_size = func(h[4:8])
113    data_size = func(h[8:12])
114    encoding = func(h[12:16])
115    rate = func(h[16:20])
116    nchannels = func(h[20:24])
117    sample_size = 1 # default
118    if encoding == 1:
119        sample_bits = 'U'
120    elif encoding == 2:
121        sample_bits = 8
122    elif encoding == 3:
123        sample_bits = 16
124        sample_size = 2
125    else:
126        sample_bits = '?'
127    frame_size = sample_size * nchannels
128    if frame_size:
129        nframe = data_size / frame_size
130    else:
131        nframe = -1
132    return filetype, rate, nchannels, nframe, sample_bits
133
134tests.append(test_au)
135
136
137def test_hcom(h, f):
138    """HCOM file"""
139    if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
140        return None
141    divisor = get_long_be(h[144:148])
142    if divisor:
143        rate = 22050 / divisor
144    else:
145        rate = 0
146    return 'hcom', rate, 1, -1, 8
147
148tests.append(test_hcom)
149
150
151def test_voc(h, f):
152    """VOC file"""
153    if not h.startswith(b'Creative Voice File\032'):
154        return None
155    sbseek = get_short_le(h[20:22])
156    rate = 0
157    if 0 <= sbseek < 500 and h[sbseek] == 1:
158        ratecode = 256 - h[sbseek+4]
159        if ratecode:
160            rate = int(1000000.0 / ratecode)
161    return 'voc', rate, 1, -1, 8
162
163tests.append(test_voc)
164
165
166def test_wav(h, f):
167    """WAV file"""
168    import wave
169    # 'RIFF' <len> 'WAVE' 'fmt ' <len>
170    if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
171        return None
172    f.seek(0)
173    try:
174        w = wave.open(f, 'r')
175    except (EOFError, wave.Error):
176        return None
177    return ('wav', w.getframerate(), w.getnchannels(),
178                   w.getnframes(), 8*w.getsampwidth())
179
180tests.append(test_wav)
181
182
183def test_8svx(h, f):
184    """8SVX file"""
185    if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
186        return None
187    # Should decode it to get #channels -- assume always 1
188    return '8svx', 0, 1, 0, 8
189
190tests.append(test_8svx)
191
192
193def test_sndt(h, f):
194    """SNDT file"""
195    if h.startswith(b'SOUND'):
196        nsamples = get_long_le(h[8:12])
197        rate = get_short_le(h[20:22])
198        return 'sndt', rate, 1, nsamples, 8
199
200tests.append(test_sndt)
201
202
203def test_sndr(h, f):
204    """SNDR file"""
205    if h.startswith(b'\0\0'):
206        rate = get_short_le(h[2:4])
207        if 4000 <= rate <= 25000:
208            return 'sndr', rate, 1, -1, 8
209
210tests.append(test_sndr)
211
212
213#-------------------------------------------#
214# Subroutines to extract numbers from bytes #
215#-------------------------------------------#
216
217def get_long_be(b):
218    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
219
220def get_long_le(b):
221    return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
222
223def get_short_be(b):
224    return (b[0] << 8) | b[1]
225
226def get_short_le(b):
227    return (b[1] << 8) | b[0]
228
229
230#--------------------#
231# Small test program #
232#--------------------#
233
234def test():
235    import sys
236    recursive = 0
237    if sys.argv[1:] and sys.argv[1] == '-r':
238        del sys.argv[1:2]
239        recursive = 1
240    try:
241        if sys.argv[1:]:
242            testall(sys.argv[1:], recursive, 1)
243        else:
244            testall(['.'], recursive, 1)
245    except KeyboardInterrupt:
246        sys.stderr.write('\n[Interrupted]\n')
247        sys.exit(1)
248
249def testall(list, recursive, toplevel):
250    import sys
251    import os
252    for filename in list:
253        if os.path.isdir(filename):
254            print(filename + '/:', end=' ')
255            if recursive or toplevel:
256                print('recursing down:')
257                import glob
258                names = glob.glob(os.path.join(glob.escape(filename), '*'))
259                testall(names, recursive, 0)
260            else:
261                print('*** directory (use -r) ***')
262        else:
263            print(filename + ':', end=' ')
264            sys.stdout.flush()
265            try:
266                print(what(filename))
267            except OSError:
268                print('*** not found ***')
269
270if __name__ == '__main__':
271    test()
272