#!usr/bin/env python # # Programmer: Craig Stuart Sapp # Creation Date: Mon Feb 1 21:31:24 PST 2010 # Last Modified: Mon Mar 1 16:08:56 PST 2010 # Filename: craigmdct.py # Syntax: Python 2.6 # """ MDCT Interface Functions for PAC system: MDCT(windowedsignal, leftoverlap, rightoverlap, isInverse=False) IMDCT(halfspectrum, leftoverlap, rightoverlap) Forward and inverse MDCT transform, with normalization of 1/N factor applied to forward MDCT rather than IMDCT. Implemented using the numpy.fft.fft/numpy.fft.ifft functions which utilize the "Fast Fourier Transform" algorithm of the DFT. MDCTslow(windowedsignal, leftoverlap, rightoverlap, isInverse=False) IMDCTslow(halfspectrum, leftoverlap, rightoverlap) Direct implementation of the MDCT with O(N**2) computational efficiency. Performs much slower as length of the transform is increased when compared the the above pair of functions. """ import numpy as np import numpy.fft as npf def MDCT(windowedsignal, a=-1, b=-1, isInverse=False): if (isInverse is False): return MDCTpositive (windowedsignal, a, b) else: return IMDCTpositive(windowedsignal, a, b) def IMDCT(halfspectrum, a=-1, b=-1): return IMDCTpositive(halfspectrum, a, b) def MDCTslow(windowedsignal, a=-1, b=-1, isInverse=False): if (isInverse is False): return np.split(DirectMDCT(windowedsignal, a, b), 2)[0] else: return IMDCTSlow(windowedsignal, a, b) def IMDCTslow(halfspect, a=-1, b=-1): fullspectrum = np.concatenate([halfspect, -np.flipud(halfspect)]) return DirectIMDCT(fullspectrum, a, b) ########################################################################### ############################## ## ## MDCTpositive -- MDCT implemented using numpy.fft.fft(). ## numpy.fft.fft() can actually receive data of any length, ## so no need to check for powers of two for the classical implementation ## of the Fast Fourier Transform. numpy.fft.fft() is "fast" when ## the prime factors of the length are about less than 11. For ## larger prime factors, the "direct" implementation of the Discrete ## Fourier Transform is used by numpy.fft.fft() ## ## Example input/output: ## MDCTpositive([0, 1, 2, 3, 4, 5, 6, 7]) ## array([-5.44915121, -0.87556153, 0.81515624, 0.61291387]) ## ## Asymmetric 50% overlapping case: ## MDCTpositive([0, 1, 2, 3, 4, 5], 2, 4) ## array([-3.99001577, 0.23570226, 0.72402944]) ## def MDCTpositive(input, a=-1, b=-1, normalize=True): """MDCTpositive: MDCT implemented with FFT, returning first 1/2 of spectrum array (positive frequencies). The MDCT includes the normalization factor rather than the IMDCT. Arguments: input: pre-windowed signal (using sine or kbd window). a: number of samples in overlap with previous 50% overlap frame. b: number of samples in overlap with next 50% overlap frame. Returns: The first half (positive) portion of antisymmetric spectrum. """ a = int(a) b = int(b) input = np.array(input) if (a<0 or b<0): N = len(input) a = N/2 b = N/2 else: N = a + b assert N == len(input) assert N%2 == 0 n0 = (b+1.0)/2.0 n = np.arange(N) y = input * np.e**(-1j*np.pi*n/N) Y = npf.fft(y) if (normalize is True): # Factor of 1/N is applied to place spectrum in range from -1.0 to +1.0; # otherwise the maximum spectral value is N (consider the transform # of npf.fft[1,1,1,1] which is [4,0,0,0], so dividing by 4 sets the # maximum to 1: npf.fft([1,1,1,1])/4 = [1,0,0,0] Y *= 1.0/N k = np.arange(N/2) # frequency bins of the fft up to the Nyquist freq. output = np.real(Y[k] * np.e**(-2j*np.pi*n0*(k+0.5)/N)) # Since the MDCT spectrum contains equal energy in the positive # and negative frequencies, add an extra factor of 2.0 to increase # the positive frequency values to full scale for peak amplitude # sinewaves. output *= 2.0 return output ############################## ## ## IMDCTpositive -- Inverse MDCT implemented using numpy.fft.ifft(). ## ## Example input/output: ## IMDCTpositive([0, 1, 2, 3]) ## array([ 3.8076084 , -3.6699737 , 3.6699737 , -3.8076084 , ## 7.64674316, -5.05576209, -5.05576209, 7.64674316]) ## ## Asymmetric 50% overlapping case: ## IMDCTpositive([0, 1, 2], 2, 4) ## array([ 2.44948974, -2.44948974, 4.24264069, ## -2.44948974, -2.44948974, 4.24264069]) ## def IMDCTpositive(input, a=-1, b=-1, normalize=False): """IMDCTpositive: IMDCT implemented with IFFT. Arguments: input: first 1/2 of anti-symmetric MDCT spectrum. a: number of samples in overlap with previous 50% overlap frame. b: number of samples in overlap with next 50% overlap frame. Returns: Full-length time-domain signal (which then needs to be windowed with a sine or KBD window) before 50% overlap-add. """ a = int(a) b = int(b) input = np.array(input) # reconstruct negative frequency portion of MDCT spectrum: input = np.concatenate([input, -np.flipud(input)]) if (a<0 or b<0): N = len(input) a = N/2 b = N/2 else: N = a + b assert N == len(input) assert N%2 == 0 n0 = (b+1.0)/2.0 k = np.arange(N) y = input * np.e**(1j*2*np.pi*k*n0/N) Y = npf.ifft(y) if (normalize is False): # The IFFT (for signal processing and as found in numpy) already contains # a 1/N normalization factor. However, IMDCTpositive already applied this # normalization factor, so remove the normalization from here. Y *= N n = np.arange(N) output = np.real(Y * np.e**(1j*np.pi*(n+n0)/N)) return output ######################################################################### ############################## ## ## DirectMDCT -- O(N**2) implementation of the MDCT. Outputs a full length-N ## spectrum for a length-N signal (unlike the MDCTpositive above which ## outputs an N/2 length spectrum from a length-N signal). ## ## Example inputs/outputs: ## ## DirectMDCT([0, 1, 2, 3, 4, 5, 6, 7]) ## array([-5.44915121, -0.87556153, 0.81515624, 0.61291387, ## -0.61291387, -0.81515624, 0.87556153, 5.44915121]) ## ## DirectMDCT([0, 1, 2, 3, 4, 5], 2, 4) ## array([-3.99001577, 0.23570226, 0.72402944, ## -0.72402944, -0.23570226, 3.99001577]) ## def DirectMDCT(input, a=-1, b=-2, normalize=True): """DirectMDCT: Calculate MDCT as two nested N-length summations Arguments: input: pre-windowed signal (using sine or kbd window) a: number of samples in overlap with previous 50% overlap frame. b: number of samples in overlap with next 50% overlap frame. Returns: The complete N-length anti-symmetric spectrum. """ input = np.array(input) a = int(a) b = int(b) if (a<0 or b<0): N = len(input) a = N/2 b = N/2 else: N = a + b assert N%2 == 0 assert N == len(input) n0 = (b + 1.0)/2.0 tpN = 2.0 * np.pi / N n = np.arange(N) output = np.zeros(N) for k in xrange(N): output[k] = np.sum(input * np.cos(tpN*(n+n0)*(k+0.5))) if (normalize is True): output *= 1.0/N # add extra 2.0 normalization factor so pos. freqs. match other MDCT amps. return 2.0 * output ############################## ## ## Jason Su's solution using maxtrix calculations. (positive spectrum output) ## ## Example function calls: ## ## DirectMDCTjs([0, 1, 2, 3, 4, 5, 6, 7], 4, 4) ## matrix([[-5.44915121], ## [-0.87556153], ## [ 0.81515624], ## [ 0.61291387]]) ## DirectMDCTjs([0, 1, 2, 3], 4, 4, True) # IMDCT ## matrix([[ 3.8076084 ], ## [-3.6699737 ], ## [ 3.6699737 ], ## [-3.8076084 ], ## [ 7.64674316], ## [-5.05576209], ## [-5.05576209], ## [ 7.64674316]]) ## def DirectMDCTjs(input, a=-1, b=-2, isInverse=False): N = a+b n0 = (b+1)/2.0 mat1 = (np.matrix(np.arange(N/2))+0.5).T # N/2x1 column vector mat2 = np.matrix(np.arange(N)) + n0 # 1xN row vector xfmmat = np.cos(2.0*np.pi/N*mat1*mat2) # build transform matrix # Add extra 2.0 factor to include negative energy xfmmat *= 2.0 if not isInverse: xfmmat *= 1.0/N else: xfmmat = xfmmat.T # make sure input a column vector input = np.matrix(input) if input.shape[0] is 1 and input.shape[1] is not 1: input = input.T return xfmmat*input ############################## ## ## DirectIMDCT -- O(N**2) implementation of the inverse MDCT. ## ## Example inputs/outputs: ## ## DirectIMDCT([0, 1, 2, 3, -3, -2, -1, 0]) ## array([ 3.8076084 , -3.6699737 , 3.6699737 , -3.8076084 , ## 7.64674316, -5.05576209, -5.05576209, 7.64674316]) ## ## DirectIMDCT([0, 1, 2, -2, -1, 0], 2, 4) ## array([ 2.44948974, -2.44948974, 4.24264069, ## -2.44948974, -2.44948974, 4.24264069]) ## def DirectIMDCT(input, a=-1, b=-1, normalize=False): """DirectIMDCT: IMDCT implemented with double summation. Arguments: input: Full-length anti-symmetric spectrum. a: number of samples in overlap with previous 50% overlap frame. b: number of samples in overlap with next 50% overlap frame. Returns: Full-length time-domain signal (which then needs to be windowed with a sine or KBD window). """ input = np.array(input) # if the input is only the first half of the spectrum, use this line: # input = np.concatenate([input, -np.flipud(input)]) a = int(a) b = int(b) if (a<0 or b<0): N = len(input) a = N/2 b = N/2 else: N = a + b assert N%2 == 0 assert N == len(input) n0 = (b + 1.0)/2.0 tpN = 2.0 * np.pi / N k = np.arange(N) output = np.zeros(N) for n in xrange(N): output[n] += np.sum(input * np.cos(tpN * (n+n0) * (k+0.5))) if (normalize is True): output *= 2.0 / N; return output ########################################################################### # # Testing code: # ### ### Various testing plots ### if __name__ == "__main__": """ # Verify that the TDAC process returns the original signal, within # machine precision. (HW 3, Question 1) #"" import numpy as np import numpy.fft as npf from craigwindow import * sigcase = 2 wincase = 2 if (sigcase == 0): N = 32 signal = np.ones(N) elif (sigcase == 1): N = 32 signal = np.sin(2.0 * np.pi * np.arange(N) * 1.0 / N) else: N = 8 signal = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] if (wincase == 0): window = SineWindow(N) elif (wincase == 1): window = KBDWindow(N, 1) elif (wincase == 2): window = np.ones(N)/np.sqrt(2) elif (wincase == 3): window = HanningWindow(N) else: window = np.ones(N) # Add 1/2 frame of zeros before and after signal. # Mod of signal length also needs to first be padded to 1/2 frame length. padding = np.zeros(N/2) paddedsignal = np.concatenate((padding, signal, padding)) segments = np.split(paddedsignal, len(signal)*2/N+2) # fcount: number of signal frames fcount = len(segments)-1; # frames, 50% overlapping frames of the original signal frames = [np.concatenate([segments[i], segments[i+1]]) \ for i in xrange(fcount)] # blocks = MDCT half-spectra of windowed signal frames blocks = [MDCT(frames[i] * window) for i in xrange(fcount)] # Bit optimization and quantization done here... # iframes = blocks IMDCTed and re-windowed iframes = np.array([IMDCT(blocks[i]) * window for i in xrange(fcount)]) # isegments = 1/2 length iframes, preparing for 50% overlap isegments = iframes.flatten() isegments = np.split(isegments, len(isegments)*2/N) # drop initial and final 1/2 frames which contain aliasing junk: isegments = isegments[1:-1] # isignal: 50% overlap summation to regenerate original signal. isignal = np.array( [ isegments[2*i] + isegments[2*i+1] \ for i in xrange(fcount-1) ] ).flatten() diff = np.abs(signal - isignal) max(diff) ### 3.5527136788005009e-15 # Using KBDWindow(N, 1): ### 2.886579864025407e-15 # Using 1/sqrt(2) window: ### 7.1054273576010019e-15 # examine reconstruction of [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]: isignal ### array([ 1.22124533e-15, 1.00000000e+00, 2.00000000e+00, ### 3.00000000e+00, 4.00000000e+00, 5.00000000e+00, ### 6.00000000e+00, 7.00000000e+00, 8.00000000e+00, ### 9.00000000e+00, 1.00000000e+01, 1.10000000e+01]) #"" # Verify that Fast and Slow MDCT implementations give the same values. #"" import random import numpy as np N = 1024 signal = [random.random() for i in xrange(N)] fastmdct = MDCT(signal) slowmdct = MDCTslow(signal) diff = np.abs(fastmdct - slowmdct) max(diff) ### 2.1427304375265521e-14 spectrum = [random.random() for i in xrange(N/2)] fastimdct = IMDCT(spectrum) slowimdct = IMDCTslow(spectrum) idiff = np.abs(fastimdct - slowimdct) max(idiff) ### 2.8705926524708048e-11 #"" # Examine Timing characteristics of numpy.fft.fft ###################### # Homework 3, Question 1 #"" import pylab as pl import numpy as np import numpy.fft as npf import time N = 5000 domain = np.arange(1,N+1) range = np.zeros(N) dummy = np.zeros(N) pow2i = [2**n-1 for n in xrange(1+int(np.log2(N)))] for i in xrange(N): time1 = time.clock() dummy = npf.fft(np.arange(domain[i])) time2 = time.clock() range[i] = time2 - time1 pl.clf() pl.plot(domain, range, 'b+') pl.plot(domain[pow2i], range[pow2i], 'rp') pl.xlabel('transform size') pl.ylabel('time [seconds]') pl.suptitle('numpy.fft transform length v calculation time') pl.show() #"" # Examining Frequency offset between FFT and MDCT ###################### #"" # Maximum possible signal: 0 Hz signal at amplitude of 1: # (which can be viewed as two 1/2 amplitude sinewaves at 0 Hz) import pylab as pl import numpy as np import numpy.fft as npf N = 1024 dcsig = np.ones(N) dcfft = npf.fft(dcsig) dcfftabs = np.abs(dcfft) max(dcfftabs) ### 1024.0 dcifft = npf.ifft(dcfft) max(dcifft) ### (1+0j) min(dcifft) ### (1+0j) # examine in frequency range with minimal positive/negative # frequency interaction (at bin N/4 in the middle of the positive # frequency range): signal = np.sin(2 * np.pi * (N/4.0) * np.arange(N) / N) # N/4 freq. absspectrum = np.abs(npf.fft(signal)) max(absspectrum)) / N ### 0.5 # check to see if the max is where it is expected: absspectrum[N/4] / N ### 0.5 absspectrum[N/4+1] / N # look in next higher bin ### 5.103003534765435e-15 absspectrum[N/4-1] / N # look in next lower bin ### 5.1030035347654397e-15 # Move the test frequency halfway between two bins signal = np.sin(2 * np.pi * (N/4.0+0.5) * np.arange(N) / N) # N/4 freq. absspectrum = np.abs(npf.fft(signal)) max(absspectrum) / N # 0.31831188357043555 # notice that max is split between two adjacent bins: absspectrum[N/4] / N ### 0.31830888749775132 absspectrum[N/4+1] / N ### 0.31831188357043544 # and also notice that max is surrounded by lower amplitude bins: absspectrum[N/4+2] / N ### 0.10610629152320546 absspectrum[N/4-1] / N ### 0.10610329539412655 # Move the test frequency to the next higher bin: signal = np.sin(2 * np.pi * (N/4.0+1) * np.arange(N) / N) # N/4 freq. absspectrum = np.abs(npf.fft(signal)) max(absspectrum) / N # 0.5 absspectrum[N/4+1] / N ### 0.5 absspectrum[N/4] / N ### 6.5506535623712632e-15 absspectrum[N/4+2] / N ### 6.664158709764428e-15 ## MDCT examining frequency offset by 0.5 bins: # Move the test frequency halfway between two FFT bins, which # is exactly on a frequency bin in the MDCT. signal = np.sin(2 * np.pi * (N/4.0+0.5) * np.arange(N) / N) absspectrum = np.abs(MDCT(signal)) max(absspectrum) # 0.99999882345170166 # check to see if max is where it is expected: absspectrum[N/4] ### 0.99999882345170166 # and notice that max is surronded by lower amplitude bins: absspectrum[N/4+1] ### 1.3437304749009987e-16 absspectrum[N/4-1] ### 8.3154533161029903e-16 # Move the test frequency up by 1/2 bin, so it is centered on FFT bin # N/4+1 but halfway between MDCT bins N/4 and N/4+1: signal = np.sin(2.0*np.pi*(N/4+1.0) * np.arange(N) / N) absspectrum = np.abs(MDCT(signal)) max(absspectrum) ### 0.63662002212358693 absspectrum[N/4-2] ### 0.00058593083592834024 absspectrum[N/4-1] ### 0.21220334506190164 absspectrum[N/4] ### 0.00097655445718997449 absspectrum[N/4+1] ### 0.63662002212358693 absspectrum[N/4+2] ### 0.0016276571495544786 #"" # Sweep a frequency past an FFT bin to examine continuous Fourier transform # in the region between bins. #" import pylab as pl import numpy as np N = 1024; window = HanningWindow(N) #window = np.ones(N) windowfactor = np.sum(window) / N output = np.zeros(N) domain = N/4.0 - 2.0 + (4.0 * np.arange(N) / N) - 256 for i in xrange(N): signal = np.sin(2*np.pi*(N/4.0-2.0 + 4.0*i/N)*np.arange(N)/N) absspectrum = np.abs(npf.fft(signal * window)) * 2 / N / windowfactor output[i] = absspectrum[N/4] pl.clf() pl.plot(domain, output) pl.show() # Sweep a frequency past an MDCT bin to examine continuous MDCT transform # in the region between bins. import pylab as pl import numpy as np N = 1024; window = SineWindow(N) #window = np.ones(N) windowfactor = np.sum(window) / N output = np.zeros(N); domain = N/4.0 - 2.0 + (4.0 * np.arange(N) / N) - 256 for i in xrange(N): signal = np.sin(2*np.pi*(N/4.0+0.5-2.0 + 4.0*i/N)*np.arange(N)/N) absspectrum = np.abs(MDCT(signal * window)) / windowfactor output[i] = absspectrum[N/4] pl.plot(domain, output) pl.show() #"" # Amplitude/Power scaling due to windowing within a fractional bin range. #" import pylab as pl import numpy as np import numpy.fft as npf N = 1024 from craigwindow import * sinwin = SineWindow(N) hanwin = HanningWindow(N) lowrange = np.zeros(N+1) lowrangesin = np.zeros(N+1) lowrangehan = np.zeros(N+1) for i in xrange(N+1): sig = np.sin(2 * np.pi * (2.0+1.0*i/N) * np.arange(N) / N) spec = npf.fft(sig) absspec = np.abs(spec) lowrange[i] = max(absspec) spec = npf.fft(sig*sinwin) absspec = np.abs(spec) lowrangesin[i] = max(absspec) spec = npf.fft(sig*hanwin) absspec = np.abs(spec) lowrangehan[i] = max(absspec) hirange = np.zeros(N+1) hirangesin = np.zeros(N+1) hirangehan = np.zeros(N+1) for i in xrange(N+1): sig = np.sin(2 * np.pi * (N/4+1.0*i/N) * np.arange(N) / N) spec = npf.fft(sig) absspec = np.abs(spec) hirange[i] = max(absspec) spec = npf.fft(sig*sinwin) absspec = np.abs(spec) hirangesin[i] = max(absspec) spec = npf.fft(sig*hanwin) absspec = np.abs(spec) hirangehan[i] = max(absspec) pl.clf() pl.plot(np.arange(N+1.0)/N,lowrange, np.arange(N+1.0)/N,hirange, np.arange(N+1.0)/N,lowrangesin, np.arange(N+1.0)/N,hirangesin, np.arange(N+1.0)/N,lowrangehan, np.arange(N+1.0)/N,hirangehan) pl.title("FFT") pl.xlabel("fractional bin of signal frequency") pl.ylabel("spectral bin maximum [amplitude]") pl.show() #"" # Examine effect of interaction between negative and positive components # in the MDCT. #"" import pylab as pl import numpy as np import numpy.fft as npf N = 1024 from craigwindow import * sinwin = SineWindow(N) hanwin = HanningWindow(N) lowrangesin = np.zeros(N+1) for i in xrange(N+1): sig = np.sin(2 * np.pi * (2.0+1.0*i/N) * np.arange(N) / N) spec = MDCT(sig*sinwin) absspec = np.abs(spec) lowrangesin[i] = max(absspec) hirangesin = np.zeros(N+1) for i in xrange(N+1): sig = np.sin(2 * np.pi * (N/4+1.0*i/N) * np.arange(N) / N) spec = MDCT(sig*sinwin) absspec = np.abs(spec) hirangesin[i] = max(absspec) pl.clf() pl.title("MDCT") pl.plot(np.arange(N+1.0)/N,lowrangesin, 'b-') pl.plot(np.arange(N+1.0)/N,hirangesin, 'r-') pl.xlabel("fractional bin of signal frequency") pl.ylabel("spectral bin maximum [amplitude]") pl.show() """