xref: /aosp_15_r20/external/libopus/dnn/torch/neural-pitch/utils.py (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li"""
2*a58d3d2aSXin LiUtility functions that are commonly used
3*a58d3d2aSXin Li"""
4*a58d3d2aSXin Li
5*a58d3d2aSXin Liimport numpy as np
6*a58d3d2aSXin Lifrom scipy.signal import windows, lfilter
7*a58d3d2aSXin Lifrom prettytable import PrettyTable
8*a58d3d2aSXin Li
9*a58d3d2aSXin Li
10*a58d3d2aSXin Li# Source: https://gist.github.com/thongonary/026210fc186eb5056f2b6f1ca362d912
11*a58d3d2aSXin Lidef count_parameters(model):
12*a58d3d2aSXin Li    table = PrettyTable(["Modules", "Parameters"])
13*a58d3d2aSXin Li    total_params = 0
14*a58d3d2aSXin Li    for name, parameter in model.named_parameters():
15*a58d3d2aSXin Li        if not parameter.requires_grad: continue
16*a58d3d2aSXin Li        param = parameter.numel()
17*a58d3d2aSXin Li        table.add_row([name, param])
18*a58d3d2aSXin Li        total_params+=param
19*a58d3d2aSXin Li    print(table)
20*a58d3d2aSXin Li    print(f"Total Trainable Params: {total_params}")
21*a58d3d2aSXin Li    return total_params
22*a58d3d2aSXin Li
23*a58d3d2aSXin Lidef stft(x, w = 'boxcar', N = 320, H = 160):
24*a58d3d2aSXin Li    x = np.concatenate([x,np.zeros(N)])
25*a58d3d2aSXin Li    # win_custom = np.concatenate([windows.hann(80)[:40],np.ones(240),windows.hann(80)[40:]])
26*a58d3d2aSXin Li    return np.stack([np.fft.rfft(x[i:i + N]*windows.get_window(w,N)) for i in np.arange(0,x.shape[0]-N,H)])
27*a58d3d2aSXin Li
28*a58d3d2aSXin Lidef random_filter(x):
29*a58d3d2aSXin Li    # Randomly filter x with second order IIR filter with coefficients in between -3/8,3/8
30*a58d3d2aSXin Li    filter_coeff = np.random.uniform(low =  -3.0/8, high = 3.0/8, size = 4)
31*a58d3d2aSXin Li    b = [1,filter_coeff[0],filter_coeff[1]]
32*a58d3d2aSXin Li    a = [1,filter_coeff[2],filter_coeff[3]]
33*a58d3d2aSXin Li    return lfilter(b,a,x)
34*a58d3d2aSXin Li
35*a58d3d2aSXin Lidef feature_xform(feature):
36*a58d3d2aSXin Li    """
37*a58d3d2aSXin Li    Take as input the (N * 256) xcorr features output by LPCNet and perform the following
38*a58d3d2aSXin Li    1. Downsample and Upsample by 2 (followed by smoothing)
39*a58d3d2aSXin Li    2. Append positional embeddings (of dim k) coresponding to each xcorr lag
40*a58d3d2aSXin Li    """
41*a58d3d2aSXin Li
42*a58d3d2aSXin Li    from scipy.signal import resample_poly, lfilter
43*a58d3d2aSXin Li
44*a58d3d2aSXin Li
45*a58d3d2aSXin Li    feature_US = lfilter([0.25,0.5,0.25],[1],resample_poly(feature,2,1,axis = 1),axis = 1)[:,:feature.shape[1]]
46*a58d3d2aSXin Li    feature_DS = lfilter([0.5,0.5],[1],resample_poly(feature,1,2,axis = 1),axis = 1)
47*a58d3d2aSXin Li    Z_append = np.zeros((feature.shape[0],feature.shape[1] - feature_DS.shape[1]))
48*a58d3d2aSXin Li    feature_DS = np.concatenate([feature_DS,Z_append],axis = -1)
49*a58d3d2aSXin Li
50*a58d3d2aSXin Li    # pos_embedding = []
51*a58d3d2aSXin Li    # for i in range(k):
52*a58d3d2aSXin Li    #     pos_embedding.append(np.cos((2**i)*np.pi*((np.repeat(np.arange(feature.shape[1]).reshape(feature.shape[1],1),feature.shape[0],axis = 1)).T/(2*feature.shape[1]))))
53*a58d3d2aSXin Li
54*a58d3d2aSXin Li    # pos_embedding = np.stack(pos_embedding,axis = -1)
55*a58d3d2aSXin Li
56*a58d3d2aSXin Li    feature = np.stack((feature_DS,feature,feature_US),axis = -1)
57*a58d3d2aSXin Li    # feature = np.concatenate((feature,pos_embedding),axis = -1)
58*a58d3d2aSXin Li
59*a58d3d2aSXin Li    return feature
60