Re: [ot][spam][random][crazy][random][crazy]

Undescribed Horrific Abuse, One Victim & Survivor of Many Fri, 18 Nov 2022 04:50:33 -0800

battery is at 45% !

0541


i fixed a mistake and two more assertions are passing.
it's paused on the failing one:

269         rfreqs15t = fftfreq(repetition_samples=15, complex=False)
270         rfreqs15f = fftfreq(15, complex=False)
271         irft_from_15 = create_freq2time(freqs=rfreqs15f)
272         rft_from_15 = create_time2freq(15, freqs=rfreqs15t)
273         irft_to_15 = create_freq2time(15, freqs=rfreqs15t)
274         randvec2rtime15 =
randvec[:15].astype(np.complex128).view(float) @ irft_from_15
275         randvec2rfreq15 = randvec[:15] @ rft_from_15
276         randvec2irfft = np.fft.irfft(randvec[:15])
277         randvec2rfft = np.fft.rfft(randvec[:15])
278         randvec2rfreq152randvec = randvec2rfreq15.view(float) @ irft_to_15
279         randvec2rfreq152irfft = np.fft.irfft(randvec2rfreq15, 15)
280         assert np.allclose(rfreqs15t, np.fft.rfftfreq(15))
281         assert np.allclose(rfreqs15f, np.fft.rfftfreq(28))
282         assert np.allclose(randvec2rtime15, randvec2irfft)
283         assert np.allclose(randvec2rfreq15, randvec2rfft)
284  ->     assert np.allclose(randvec2rfreq152randvec, randvec2rfreq152irfft)
285         assert np.alloclose(randvec2rfreq152randvec, randvec[:15])

This is likely the time-domain result of complex frequencies. my plan
was to break in the matrix generation, and run test data through the
steps to see when it differed.

134             complex_freqs = np.concatenate([
135                 freqs[:tail_idx],
136                 -freqs[tail_idx:],
137                 -freqs[head_idx:tail_idx][::-1]
138             ])
139             complex_mat = complex_wavelet(wavelet,
np.outer(complex_freqs, offsets))

162             interim_mat = complex_mat.copy()
163             interim_mat[head_idx:tail_idx] += interim_mat[len(freqs):][::-1]
164             mat = np.stack([
165                 interim_mat[:len(freqs)].real.T,
166                 -interim_mat[:len(freqs)].imag.T
167             ], axis=-1).reshape(len(complex_freqs), len(freqs)*2).T

185         return mat / freq_count # scaled to match numpy convention

note to self: the help said you can change when that / freq_count
normalization happens in numpy, i believe.

(Pdb) break 162
Breakpoint 1 at /shared/src/scratch/fourier.py:162

-> interim_mat = complex_mat.copy()

(Pdb) time_data = np.random.random(complex_mat.shape[-1])
(Pdb) complex_freq_data = time_data @ np.linalg.inv(complex_mat)
(Pdb) real_freq_data = np.concatenate([complex_freq_data[:tail_idx],
complex_freq_data[tail_idx:len(freqs)].conj()])

...

(Pdb) p complex_freq_data @ complex_mat
array([0.0202184 +1.38777878e-16j, 0.83261985-2.77555756e-17j,
       0.77815675+2.77555756e-16j, 0.87001215-1.38777878e-16j,
       0.97861834-1.24900090e-16j, 0.79915856-1.94289029e-16j,
       0.46147936-2.22044605e-16j, 0.78052918-1.80411242e-16j,
       0.11827443-1.04083409e-16j, 0.63992102+3.60822483e-16j,
       0.14335329-1.04083409e-16j, 0.94466892-1.87350135e-16j,
       0.52184832+2.74086309e-16j, 0.41466194+0.00000000e+00j,
       0.26455561-1.73472348e-17j, 0.77423369+5.55111512e-17j,
       0.45615033-5.13478149e-16j, 0.56843395+2.77555756e-16j,
       0.0187898 -4.16333634e-17j, 0.6176355 -1.66533454e-16j,
       0.61209572+1.66533454e-16j, 0.616934  -3.33066907e-16j,
       0.94374808+6.24500451e-17j, 0.6818203 +2.84494650e-16j,
       0.3595079 +2.63677968e-16j, 0.43703195+3.60822483e-16j,
       0.6976312 -2.70616862e-16j, 0.06022547+3.60822483e-16j])
(Pdb) p real_freq_data.view(float) @ mat
array([0.0202184 , 0.44642266, 0.73789397, 0.65352205, 0.66906312,
       0.74048943, 0.70261372, 0.69873159, 0.36518507, 0.62877826,
       0.08107154, 0.75655143, 0.48899933, 0.59444781, 0.26455561,
       0.59444781, 0.48899933, 0.75655143, 0.08107154, 0.62877826,
       0.36518507, 0.69873159, 0.70261372, 0.74048943, 0.66906312,
       0.65352205, 0.73789397, 0.44642266])

i'll pick sample index 6 to debug again i think.

(Pdb) p (complex_freq_data @ complex_mat)[6]
(0.4614793622529322-2.220446049250313e-16j)
(Pdb) p (real_freq_data.view(float) @ mat)[6]
0.7026137203837779

(Pdb) p complex_freq_data @ complex_mat[:,6]
(0.4614793622529323-2.6020852139652106e-16j)
(Pdb) p real_freq_data.view(float) @ mat[:,6]
0.7026137203837779

I tried the nyquist frequency, and it looks like it's working right:
(Pdb) p complex_freq_data[tail_idx] * complex_mat[tail_idx,6]
(-0.09512353353755763-2.275865808140467e-17j)
(Pdb) p real_freq_data.view(float)[tail_idx*2:] @ mat[tail_idx*2:,6]
-0.09512353353755763

so I guess the problem is in combining the conjugates in the interim matrix.

(Pdb) p complex_mat[:,6][head_idx:tail_idx].round(3)
array([ 0.223+0.975j, -0.901+0.434j, -0.623-0.782j,  0.623-0.782j,
        0.901+0.434j, -0.223+0.975j, -1.   +0.j   , -0.223-0.975j,
        0.901-0.434j,  0.623+0.782j, -0.623+0.782j, -0.901-0.434j,
        0.223-0.975j])
(Pdb) p complex_mat[:,6][len(freqs):][::-1].round(3)
array([ 0.223-0.975j, -0.901-0.434j, -0.623+0.782j,  0.623+0.782j,
        0.901-0.434j, -0.223-0.975j, -1.   +0.j   , -0.223+0.975j,
        0.901+0.434j,  0.623-0.782j, -0.623-0.782j, -0.901+0.434j,
        0.223+0.975j])

ok, yeah, as conjugates, i lose information if I just sum them in
complex space. i need to negate the imaginary parts of the conjugates.
0556

        interim_mat = complex_mat.copy()
        interim_mat[head_idx:tail_idx].real +=
interim_mat[len(freqs):][::-1].real
        interim_mat[head_idx:tail_idx].imag -=
interim_mat[len(freqs):][::-1].imag
        mat = np.stack([
            interim_mat[:len(freqs)].real.T,
            -interim_mat[:len(freqs)].imag.T
        ], axis=-1).reshape(len(complex_freqs), len(freqs)*2).T

0600
        interim_mat = complex_mat.copy()
        interim_mat[head_idx:tail_idx] += interim_mat[len(freqs):][::-1].conj()
        mat = np.stack([
            interim_mat[:len(freqs)].real.T,
            -interim_mat[:len(freqs)].imag.T
        ], axis=-1).reshape(len(complex_freqs), len(freqs)*2).T

0603
the same assertion is still failing. i figured i had just done the
wrong fix or poorly, but sample 6 is matching now:
(Pdb) p complex_freq_data[:] @ complex_mat[:,6]
(0.4614793622529323-2.6020852139652106e-16j)
(Pdb) p real_freq_data.view(float)[:] @ mat[:,6]
0.46147936225293196

looks like they all are now, which is good:
(Pdb) p (complex_freq_data @ complex_mat).round(3)
array([0.02 +0.j, 0.833-0.j, 0.778+0.j, 0.87 -0.j, 0.979-0.j, 0.799-0.j,
       0.461-0.j, 0.781-0.j, 0.118-0.j, 0.64 +0.j, 0.143-0.j, 0.945-0.j,
       0.522+0.j, 0.415+0.j, 0.265-0.j, 0.774+0.j, 0.456-0.j, 0.568+0.j,
       0.019-0.j, 0.618-0.j, 0.612+0.j, 0.617-0.j, 0.944+0.j, 0.682+0.j,
       0.36 +0.j, 0.437+0.j, 0.698-0.j, 0.06 +0.j])
(Pdb) p (real_freq_data.view(float) @ mat).round(3)
array([0.02 , 0.833, 0.778, 0.87 , 0.979, 0.799, 0.461, 0.781, 0.118,
       0.64 , 0.143, 0.945, 0.522, 0.415, 0.265, 0.774, 0.456, 0.568,
       0.019, 0.618, 0.612, 0.617, 0.944, 0.682, 0.36 , 0.437, 0.698,
       0.06 ])

i tested both generations in their generation function that they made
the same output as a complex matrix, and they appeared to to me.
these two values are supposed to be the same but are differing:
278         randvec2rfreq152randvec = randvec2rfreq15.view(float) @ irft_to_15
279         randvec2rfreq152irfft = np.fft.irfft(randvec2rfreq15, 15)

i've bumped into a couple different likely mistakes that could cause
that difference.
0608
0621

summary of failure:
vector.view(float) @ create_freq2time(15,
freqs=fftfreq(repetition_samples=15,complex=False))
is producing different output than
np.fft.irfft(vector, 15)

- there is no nyquist frequency
- the real frequencies used are rfreqs15t
- the test vector is randvec2rfreq15
- the failing matrix is constructed as irft_to_15 on line 273

so, inside the call on line 273, i should expect the final matrix to
have the same effect as np.fft.irfft(_, 15)

(Pdb) list
134             complex_freqs = np.concatenate([
135                 freqs[:tail_idx],
136                 -freqs[tail_idx:],
137                 -freqs[head_idx:tail_idx][::-1]
138             ])
139  ->         complex_mat = complex_wavelet(wavelet,
np.outer(complex_freqs, offsets))
140
141             # assuming the input's real and complex components are
broken out into separate elements
142
143             # real_in, imag_in
144             # multiplied by conjugate pair and summed
(Pdb) p is_complex, has_dc_offset, has_nyquist
(False, True, False)
(Pdb) p complex_freqs
array([ 0.        ,  0.06666667,  0.13333333,  0.2       ,  0.26666667,
        0.33333333,  0.4       ,  0.46666667, -0.46666667, -0.4       ,
       -0.33333333, -0.26666667, -0.2       , -0.13333333, -0.06666667])

Something is different before the conversion to real:
(Pdb) time_data = np.random.random(15)
(Pdb) complex_freq_data = np.fft.fft(time_data)
(Pdb) p np.fft.ifft(complex_freq_data).round(3)
array([0.02 -0.j, 0.833-0.j, 0.778-0.j, 0.87 +0.j, 0.979+0.j, 0.799+0.j,
       0.461-0.j, 0.781+0.j, 0.118-0.j, 0.64 -0.j, 0.143+0.j, 0.945-0.j,
       0.522+0.j, 0.415-0.j, 0.265+0.j])
(Pdb) p (complex_freq_data @ complex_mat / freq_count).round(3)
array([0.022-0.j, 0.892-0.j, 0.834-0.j, 0.932-0.j, 1.049-0.j, 0.856+0.j,
       0.494-0.j, 0.836-0.j, 0.127-0.j, 0.686-0.j, 0.154+0.j, 1.012-0.j,
       0.559-0.j, 0.444-0.j, 0.283+0.j])

It's strange to encounter this. I wonder what obscure error I have
made for myself.
The frequencies used to make the matrix are the same:
(Pdb) p complex_freqs
array([ 0.        ,  0.06666667,  0.13333333,  0.2       ,  0.26666667,
        0.33333333,  0.4       ,  0.46666667, -0.46666667, -0.4       ,
       -0.33333333, -0.26666667, -0.2       , -0.13333333, -0.06666667])
(Pdb) p np.fft.fftfreq(len(complex_freq_data))
array([ 0.        ,  0.06666667,  0.13333333,  0.2       ,  0.26666667,
        0.33333333,  0.4       ,  0.46666667, -0.46666667, -0.4       ,
       -0.33333333, -0.26666667, -0.2       , -0.13333333, -0.06666667])

Here's the line again it's made with:
139             complex_mat = complex_wavelet(wavelet,
np.outer(complex_freqs, offsets))

I guess it's back down to comparing a single sample or whatnot. I'll
compare sample 6 again.

0634
(Pdb) p (complex_freq_data @ complex_mat[:,6]) /
np.fft.ifft(complex_freq_data)[6]
(15.000000000000009-1.8056631630774395e-15j)
(Pdb) p freq_count
14

Looks like I'm dividing by the wrong denominator.

0636
here's how freq_count is originally made:
117         freq_count = len(freqs)
118         if not has_dc_offset:
119             freq_count += 1
120         if not is_complex:
121             freq_count = (freq_count - 1) * 2

(Pdb) p len(freqs)
8
(Pdb) p has_dc_offset
True
(Pdb) p is_complex
False
(Pdb) p (8 - 1) * 2
14

I'm kind of running into the same situation numpy had, but I've
already established a workaround of checking the frequency content to
handle it.

Where did these freqs come from?
269         rfreqs15t = fftfreq(repetition_samples=15, complex=False)

They were generated as real-domain frequencies for 15 samples.

(Pdb) p np.fft.rfftfreq(15)
array([0.        , 0.06666667, 0.13333333, 0.2       , 0.26666667,
       0.33333333, 0.4       , 0.46666667])
(Pdb) p len(np.fft.rfftfreq(15))
8

Looking at their values, one can tell that ther is a DC offset, but
there is no nyquist frequency. So the total number would be 7*2
(conjugates) + 1 (dc offset) = 15. If there were a nyquist, it would
be + another 1.

Here:
118         if not has_dc_offset:
119             freq_count += 1
120         if not is_complex:
121             freq_count = (freq_count - 1) * 2
It appears to me to be incorrect to increment freq_count before
doubling it, as it is unaddressed as to whether there is a nyquist
frequency or not. The doubling in general is assuming both a nyquist
and a DC, but in odd frequency lengths there is never a nyquist, even
in the complex domain.
I tihnk I just copied that (n - 1) * 2 expression from the numpy docs
while handling different kinds of confusion.

0642
0643
no, my analysis above is wrong.

118         if not has_dc_offset:
119             freq_count += 1
here, it has a DC offset, so this code isn't hit. otherwise it would
be adjusting for the below to work
120         if not is_complex:
121             freq_count = (freq_count - 1) * 2
here, it ignores the DC offset, and doubles the remainder.

ok umm let me look at it again
8 - 1 = 7. 7 * 2 = 14.
then it would need to add 1 to bring back the DC offset, which it isn't doing.
whta if there's a nyquist?
8(nyquist+dc+6 conjugates) - 1 = 7. 7 * 2 = 14
(6*2+2 = 14)
it's correct if there's a nyquist.
if there is not a nyquist, it should be one more.

I did this, but I'm not confident that it's the right solution, I
don't fully understand it. stlil, it seemed like the immediate
bandaid:
    if not is_complex:
        has_nyquist = bool(freqs[-1] == 0.5)
        freq_count = (freq_count - 1) * 2 + has_nyquist

0648
ok the code is now crashing with data that has a nyquist

ok i had inverted my condition. it took me some exploring in pdb to
identify this. 0651.

    if not is_complex:
        has_nyquist = bool(freqs[-1] == 0.5)
        freq_count = (freq_count - 1) * 2 + (not has_nyquist)

That assertion is now passing. 0652.

0654 the new code is not making the right matrix for the longvec
situation where there are way more samples than frequencies. i feel
much more confident engaging this after the previous work, but i
wanted to note the frequencies.

(Pdb) p longspace_freqs
array([0.        , 0.00270655, 0.0054131 , 0.00811965, 0.0108262 ,
       0.01353275, 0.0162393 , 0.01894585, 0.0216524 ])
There is a DC offset, no immediately visible nyquist, and the
frequencies are real domain.
It's notable here, that since the upper frequency has changed, the
presence of 0.5 in the middle bin no longer indicates whether there
was an odd or even number of original frequencies :/ . Checking 0.5
was a quick hack to keep using the same code layout, but it was a
quick hack I was hoping worked.

It's notable that in this situation, the repetition_samples (output
sample count of the frequency matrix) is actually a floating point
value. So I am likely using freq_count in a way that isn't appropriate
here. I probably mean to use time_count .

0658
ValueError: cannot reshape array of size 28800 into shape (17,18)
Uncaught exception. Entering post mortem debugging
Running 'cont' or 'step' will restart the program
> /shared/src/scratch/fourier.py(167)create_freq2time()
-> ], axis=-1).reshape(len(complex_freqs), len(freqs)*2).T
(Pdb) list
162             interim_mat = complex_mat.copy()
163             interim_mat[head_idx:tail_idx] +=
interim_mat[len(freqs):][::-1].conj()
164             mat = np.stack([
165                 interim_mat[:len(freqs)].real.T,
166                 -interim_mat[:len(freqs)].imag.T
167  ->         ], axis=-1).reshape(len(complex_freqs), len(freqs)*2).T

The output array has frequencies as columns (first dimension) and
samples as rows (second dimension). It looks like I'm using
len(complex_freqs) where I should be using time_count .

note:
- check for other (n-1)*2 uses and fix them
- check for uses of is_nyquist and consider them for fractional
repetition_samples or manual time_count

looks like that was the only (n-1)*2
i changed the matrix shape to use time_count

regarding is_nyquist, this will mean freq_count is wrong for subsignal
matrices with a nyquist. do they happen? when do they happen? i think
nyquists happen when there is an even number of complex frequency
bins?

    if freq_count % 2 == 0:
        if complex:
            neg_freqs = np.linspace(-max_freq, -min_freq,
num=freq_count // 2, endpoint=True)
            pos_freqs = -neg_freqs[:0:-1]
        else:
            pos_freqs = np.linspace(min_freq, max_freq, num=freq_count
// 2, endpoint=True)
            neg_freqs = pos_freqs[:0]
    else:
        pos_freqs = np.linspace(min_freq, max_freq, num=freq_count //
2, endpoint=True)
        neg_freqs = -pos_freqs[::-1] if complex else pos_freqs[:0]

Yes. When an even number of frequency bins was originally specified,
then a nyquist is included. Its value is max_freq, and it has no
complex conjugate.
Compare to when an odd number of frequency bins is originally
specified; now there is no nyquist and max_freq has a complex
conjugate.

When only real-domain frequencies are output, they are all linearly
spaced aside from the DC offset. So it looks like the information on
whether there is a nyquist or not is not within the list of
real-domain frequencies. [a fudgy way to include this data would be to
negate the nyquist, like it is in the complex frequencies]

a way to back negating the nyquist would be that a user generally
doesn't want less data from the same storage. given it is quite
reasonable to take the complex conjugate of the nyquist when it is not
actually the nyquist frequency but simply a middle frequency of
something else, the user may want to assume the larger amount of
frequency bins underlying the data, to extract more information on it
in more detail. does this make a difference ?? ... i think it means
that 1 more sample is needed to process a single repetition. ... i'm
not certain at this time what it means beyond that.

i guess for now i'll issue a warning or raise an exception in that case.

        if not complex:
            raise NotImplementedError("the time2freq functions do not
presently retain the information that an odd freq_count is associated
with real-domain data")

i raised the exception in this case because even freq_counts are more normative.
so, given the freq_count is even, this means there is indeed a
frequency with no complex conjugate, been calling that nyquist ... so
i can just set that value to True

0734 it looks like my code here:
        if not complex:
            freq_count = int((freq_count - 1) * 2)
actually usually prevents there from being an odd freq_count in the real domain

0738
trying to comprehend that there is a case where an odd freq_count happens.
when repetition_samples=15 and complex=False are passed to fftfreq,
it basically ends up calculating freq_count = repetition_samples .
this makes for an odd freq_count and real-domain data

it's kind of an obscurity of the test failing. it passes 15 to the
numpy functions too.
uhhh i'll just bring back the odd_freq_count bool parameter for now
0740

0744
ok, now normal assertions are failing again. also i added a fun
warning for when odd+noncomplex happens.

299  ->     assert np.allclose(randvec2rfreq152randvec, randvec2rfreq152irfft)

293         randvec2rfreq152randvec = randvec2rfreq15.view(float) @ irft_to_15
294         randvec2rfreq152irfft = np.fft.irfft(randvec2rfreq15, 15)

287         rft_from_15 = create_time2freq(15, freqs=rfreqs15t,
odd_freq_count=True)
288         irft_to_15 = create_freq2time(15, freqs=rfreqs15t)

oops, gotta pass that parameter i made for the second use too.

0748 all the basic asserts are passing again, and additionally the
longvec matrices are created without crashing (the associated asserts
are still failing, and i've changed things they use so that's roughly
expected)

guess i'll attach my mess and poke at the real world a little

# AGPL-3 Karl Semich 2022
import numpy as np

# At the moment, wavelets must be:
# - Zero at zero f(0) == 0
# - Even functions f(x) == f(-x)
# - Odd functions when slid by 90 degrees f(0.25 - x) == f(0.25 + x)
# Presently x is passed in the modular range [0,1) but this is arbitrary, just remove it if helpful
COSINE = lambda x: np.cos(2 * np.pi * x)
STEP = lambda x: COSINE(x).round()

def complex_wavelet(wavelet, x):
    return wavelet(x % 1) + wavelet((x - 0.25) % 1) * 1j

def fftfreq(freq_count = None, sample_rate = None,
            min_freq = None, max_freq = None,
            dc_offset = True, complex = True, sample_time = None,
            repetition_time = None, repetition_samples = None,
            freq_sample_rate = None, freq_sample_time = None):
    '''
    Calculates and returns the frequency bins used to convert between the time
    and frequency domains with a discrete Fourier transform.

    With no optional arguments, this function should be equivalent to numpy.fft.fftfreq .

    To specify frequencies in terms of repetitions / sample_count, set
    sample_rate to sample_count. If frequencies were in Hz (cycles/sec), this
    implies that sample_count should be considered to last 1 second, so the
    frequency becomes equal to the cycle count.

    Parameters:
        - freq_count: the number of frequency bins to generate
        - sample_rate: the time-domain sample rate (default: 1 sample/time_unit)
        - min_freq: the minimum frequency the signal contains
        - max_fraq: the maximum frequency the signal contains
        - dc_offset: whether to include a DC offset component (0 Hz)
        - complex: whether to generate negative frequencies
        - sample_time: sample_rate as the duration of a single sample
        - repetition_time: min_freq as a period time, possibly of a subsignal
        - repetition_samples: min_freq as a period size in samples
        - freq_sample_rate: convert to or from a different frequency-domain sample rate
        - freq_sample_time: freq_sample_rate as the duration of a single sample

    Returns a vector of sinusoid time scalings that can be used to perform or
    analyse a discrete Fourier transform.  Multiply this vector by the time-domain
    sample rate to get the frequencies.
    '''
    assert not sample_time or not sample_rate
    assert not freq_sample_time or not freq_sample_rate
    assert (min_freq, repetition_time, repetition_samples).count(None) >= 2
    assert freq_count or min_freq or repetition_time or repetition_samples
    if sample_time is not None:
        sample_rate = 1 / sample_time
    if freq_sample_time is not None:
        freq_sample_rate = 1 / freq_sample_time
    sample_rate = sample_rate or freq_sample_rate or 1
    freq_sample_rate = freq_sample_rate or sample_rate or 1
    if repetition_time:
        min_freq = 1 / repetition_time
    elif repetition_samples:
        min_freq = freq_sample_rate / repetition_samples
    # here freq_count adopts the complex + DC offset value, for calculations
    # it might be clearer to use e.g. sample_count, at least consolidate the cases with lower
    if freq_count is None:
        freq_count = int(np.ceil(freq_sample_rate / min_freq))
        #if (freq_count % 2) == 1 and not complex:
        #    freq_count += 1
    else:
        if not dc_offset:
            freq_count += 1
        if not complex:
            freq_count = int((freq_count - 1) * 2)
        if not min_freq:
            min_freq = freq_sample_rate / freq_count
    if not max_freq:
        #max_freq = freq_sample_rate / 2
        max_freq = freq_count * min_freq / 2
        if freq_count % 2 != 0:
            #max_freq -= freq_sample_rate / (2 * freq_count)
            max_freq -= min_freq / 2
    min_freq /= sample_rate
    max_freq /= sample_rate
    if freq_count % 2 == 0:
        if complex:
            neg_freqs = np.linspace(-max_freq, -min_freq, num=freq_count // 2, endpoint=True)
            pos_freqs = -neg_freqs[:0:-1]
        else:
            pos_freqs = np.linspace(min_freq, max_freq, num=freq_count // 2, endpoint=True)
            neg_freqs = pos_freqs[:0]
    else:
        pos_freqs = np.linspace(min_freq, max_freq, num=freq_count // 2, endpoint=True)
        neg_freqs = -pos_freqs[::-1] if complex else pos_freqs[:0]
        if not complex:
            import warnings
            warnings.warn('An odd-valued complex frequency count was generated for real-valued frequencies. Be sure to pass odd_repetition_samples=True when using the frequencies.', stacklevel=2)
        #assert complex # the doubling of freq_count should prevent odd freq counts
        # if there were a real-domain odd freq count then the matrix functions would need this information
    return np.concatenate([
        np.array([0] if dc_offset else []),
        pos_freqs,
        neg_freqs
    ])

def create_freq2time(time_count = None, freqs = None, wavelet = COSINE, odd_repetition_samples = None):
    '''
    Creates a matrix that will perform an inverse discrete Fourier transform
    when it post-multiplies a vector of complex frequency magnitudes.

    Example
        time_data = spectrum @ create_freq2time(len(spectrum))

    Parameters:
        - time_count: size of the output vector, defaults to the frequency bincount
        - freqs: frequency bins to convert, defaults to traditional FFT frequencies

    Returns:
        - an inverse discrete Fourier matrix of shape (len(freqs), time_count)
    '''
    assert (time_count is not None) or (freqs is not None)
    if freqs is None:
        assert odd_repetition_samples is None
        odd_repetition_samples = (time_count % 2 == 1)
        freqs = fftfreq(time_count)
    is_complex = (freqs < 0).any()
    has_dc_offset = bool(freqs[0] == 0)
    freq_count = len(freqs)
    if not has_dc_offset:
        freq_count += 1
    if not is_complex:
        #has_nyquist = bool(freqs[-1] == 0.5)
        #has_nyquist = True # because when max_freq is changed this information isn't held here
        has_nyquist = (bool(odd_repetition_samples) != has_dc_offset)
        freq_count = (freq_count - 1) * 2 + (not has_nyquist)
    if time_count is None:
        time_count = freq_count
    offsets = np.arange(time_count)
    if is_complex:
        mat = complex_wavelet(wavelet, np.outer(freqs, offsets))
    else:
        head_idx = 1 if has_dc_offset else 0
        tail_idx = len(freqs) - has_nyquist

        # note that a negative frequency produces a negative imaginary part
        # so some things could be simplified here
        complex_freqs = np.concatenate([
            freqs[:tail_idx],
            -freqs[tail_idx:],
            -freqs[head_idx:tail_idx][::-1]
        ])
        complex_mat = complex_wavelet(wavelet, np.outer(complex_freqs, offsets))

        # assuming the input's real and complex components are broken out into separate elements

        # real_in, imag_in
        # multiplied by conjugate pair and summed
        # (real_in_left + imag_in_left j) * (real_in_right + imag_in_right j) +
        #  (real_in_left + imag_in_left j) * (real_in_right - imag_in_right j)
        # (real_in_left + imag_in_left j) * (real_in_right + imag_in_right j + real_in_right - imag_in_right j)
        # (real_in_left + imag_in_left j) * (2 * real_in_right)
        # i must have made a mistake, because the answer is coming out with an imaginary component rather than with phase shift.

        # can consider input of [0,1j,0]
        # real_in_left = real_in_right = 0
        # imag_in_left = 1
        # imag_in_right = 0.25
        # real_out = (real_in_left * real_in_right) - imag_in_left * imag_in_right * 2

        # the nonincluded frequencies are actually negative.
        # so the real output is real_proudct - 2 * imag_product

        # real_out = real_in_left * real_in_right - imag_in_left * imag_in_right
        #import pdb; pdb.set_trace()
        interim_mat = complex_mat.copy()
        interim_mat[head_idx:tail_idx] += interim_mat[len(freqs):][::-1].conj()
        mat = np.stack([
            interim_mat[:len(freqs)].real.T,
            -interim_mat[:len(freqs)].imag.T
        ], axis=-1).reshape(time_count, len(freqs)*2).T
        #mat = mat.reshape(complex_mat.shape[0], len(freqs)*2)
        # for conjugate
        #mat[head_idx*2:tail_idx*2::2] += np.stack([
        #    0, -complex_mat[len(freqs):].imag
        #], axis=-1).reshape(complex_mat.shape[0], (tail_idx-head_idx)*2)

        #mat = complex_wavelet(wavelet, np.outer(np.concatenate([freqs[:tail_idx], -freqs[tail_idx:]]), offsets))

        ## todo: remove test data
        #full_mat = complex_wavelet(wavelet, np.outer(np.concatenate([freqs[:tail_idx], -freqs[tail_idx:], -freqs[head_idx:tail_idx][::-1]]), offsets))
        #time_data = np.random.random(full_mat.shape[1])
        #extended_freq_data = time_data @ np.linalg.inv(full_mat)
        #freq_data = extended_freq_data[:len(freqs)]
        #import pdb; pdb.set_trace()

        #mat[head_idx:tail_idx,:] += complex_wavelet(wavelet, np.outer(-freqs[head_idx:tail_idx], offsets))
        #mat = mat.real
    return mat / freq_count # scaled to match numpy convention

def create_time2freq(time_count = None, freqs = None, wavelet = COSINE, odd_repetition_samples = None):
    '''
    Creates a matrix that will perform a forward discrete Fourier transform
    when it post-multiplies a vector of time series data.

    If time_count is too small or large, the minimal least squares solution
    over all the data passed will be produced.

    This function is equivalent to calling .pinv() on the return value of
    create_freq2time. If the return value is single-use, it is more efficient and
    accurate to use numpy.linalg.lstsq .

    Example
        spectrum = time_data @ create_time2freq(len(time_data))

    Parameters:
        - time_count: size of the input vector, defaults to the frequency bincount
        - freqs: frequency bins to produce, defaults to traditional FFT frequencies

    Returns:
        - a discrete Fourier matrix of shape (time_count, len(freqs))
    '''
    assert (time_count is not None) or (freqs is not None)
    if freqs is None:
        assert odd_repetition_samples is None
        odd_repetition_samples = (time_count % 2 == 1)
        freqs = fftfreq(time_count)
    is_complex = (freqs < 0).any()
    # TODO: provide for DC offsets and nyquists not at index 0 by excluding items in (0,0.5)
    has_dc_offset = bool(freqs[0] == 0)
    if not is_complex:
        #has_nyquist = bool(freqs[-1] == 0.5)
        #has_nyquist = True
        has_nyquist = (bool(odd_repetition_samples) != has_dc_offset)
        head_idx = 1 if has_dc_offset else 0
        tail_idx = len(freqs)-1 if has_nyquist else len(freqs)
        neg_start_idx = len(freqs)
        freqs = np.concatenate([
            freqs[:head_idx],
            freqs[head_idx:tail_idx],
            -freqs[tail_idx:neg_start_idx],
            -freqs[head_idx:tail_idx][::-1]
        ])
    inverse_mat = create_freq2time(time_count, freqs, wavelet, odd_repetition_samples = odd_repetition_samples)
    forward_mat = np.linalg.pinv(inverse_mat)
    if not is_complex:
        forward_mat = np.concatenate([
            forward_mat[:,:tail_idx],
            forward_mat[:,tail_idx:neg_start_idx].conj()
        ], axis=1)
    return forward_mat

def peak_pair_idcs(freq_data, dc_offset=True, sum=True):
    dc_offset = int(dc_offset)
    freq_heights = abs(freq_data) # squares and sums the components
    if sum:
        while len(freq_heights).shape > 1:
            freq_heights = freq_height.sum(axis=0)
    paired_heights = freq_heights[...,dc_offset:-1] + freq_heights[...,dc_offset+1:]
    peak_idx = paired_heights.argmax(axis=-1, keepdims=True) + dc_offset
    return np.concatenate(peak_idx, peak_idx + 1, axis=-1)

def improve_peak(time_data, min_freq, max_freq):
    freqs = fftfreq(time_data.shape[-1], min_freq = min_freq, max_freq = max_freq)
    freq_data = time_data @ np.linalg.inv(create_freq2time(freqs))
    left, right = peak_pair_idcs(freq_data)
    return freq_data[left], freq_data[right]
    

def test():
    np.random.seed(0)

    randvec = np.random.random(16)
    freqs16 = fftfreq(16)
    ift16 = create_freq2time(16, freqs=freqs16)
    ft16 = create_time2freq(16)
    randvec2time = randvec @ ift16
    randvec2freq = randvec @ ft16
    randvec2ifft = np.fft.ifft(randvec)
    randvec2fft = np.fft.fft(randvec)
    assert np.allclose(freqs16, np.fft.fftfreq(16))
    assert np.allclose(randvec2ifft, randvec2time)
    assert np.allclose(randvec2fft, randvec2freq)
    assert np.allclose(randvec2ifft, np.linalg.solve(ft16.T, randvec))
    assert np.allclose(randvec2fft, np.linalg.solve(ift16.T, randvec))
    #randvec15f = randvec[:15] @ create_time2freq(15)
    rfreqs15t = fftfreq(repetition_samples=15, complex=False)
    rfreqs15f = fftfreq(15, complex=False)
    irft_from_15 = create_freq2time(freqs=rfreqs15f)
    rft_from_15 = create_time2freq(15, freqs=rfreqs15t, odd_repetition_samples=True)
    irft_to_15 = create_freq2time(15, freqs=rfreqs15t, odd_repetition_samples=True)
    randvec2rtime15 = randvec[:15].astype(np.complex128).view(float) @ irft_from_15
    randvec2rfreq15 = randvec[:15] @ rft_from_15
    randvec2irfft = np.fft.irfft(randvec[:15])
    randvec2rfft = np.fft.rfft(randvec[:15])
    randvec2rfreq152randvec = randvec2rfreq15.view(float) @ irft_to_15
    randvec2rfreq152irfft = np.fft.irfft(randvec2rfreq15, 15)
    assert np.allclose(rfreqs15t, np.fft.rfftfreq(15))
    assert np.allclose(rfreqs15f, np.fft.rfftfreq(28))
    assert np.allclose(randvec2rtime15, randvec2irfft)
    assert np.allclose(randvec2rfreq15, randvec2rfft)
    assert np.allclose(randvec2rfreq152randvec, randvec2rfreq152irfft)
    assert np.allclose(randvec2rfreq152randvec, randvec[:15])
    rfreqs16t = fftfreq(repetition_samples=16, complex=False)
    rfreqs16f = fftfreq(16, complex=False)
    irft16 = create_freq2time(freqs=rfreqs16f)
    rft16 = create_time2freq(16, freqs=rfreqs16t)
    randvec2rtime16 = randvec[:16].astype(np.complex128).view(float) @ irft16
    randvec2rfreq16 = randvec[:16] @ rft16
    randvec2irfft = np.fft.irfft(randvec[:16])
    randvec2rfft = np.fft.rfft(randvec[:16])
    assert np.allclose(rfreqs16t, np.fft.rfftfreq(16))
    assert np.allclose(rfreqs16f, np.fft.rfftfreq(30))
    assert np.allclose(randvec2rtime16, randvec2irfft)
    assert np.allclose(randvec2rfreq16, randvec2rfft)

    # [ 0, 1, 2, 3, 4, 3, 2, 1]
    #irft9_16 = create_freq2time(16, fftfreq(9, complex = False))
    #rft16_30 = create_time2freq(16, fftfreq(30, complex = False))
    #randvec2
    #assert np.allclose((randvec @ rft16) @ irft16, randvec)
    
    
    # sample data at a differing rate
    time_rate = np.random.random() * 2
    freq_rate = 1.0
    freqs = np.fft.fftfreq(len(randvec))
    rescaling_freqs = fftfreq(len(randvec), freq_sample_rate = freq_rate, sample_rate = time_rate)
    rescaling_ift = create_freq2time(freqs = rescaling_freqs)
    rescaling_ft = create_time2freq(freqs = rescaling_freqs)
    rescaled_time_data = np.array([
        np.mean([
            randvec[freqidx] * np.exp(2j * np.pi * freqs[freqidx] * sampleidx / time_rate)
            for freqidx in range(len(randvec))
        ])
        for sampleidx in range(len(randvec))
    ])
    assert np.allclose(rescaled_time_data, randvec @ rescaling_ift)
    assert np.allclose(rescaled_time_data, np.linalg.solve(rescaling_ft.T, randvec))
    unscaled_freq_data = rescaled_time_data @ rescaling_ft
    unscaled_time_data = unscaled_freq_data @ ift16
    assert np.allclose(unscaled_freq_data, randvec)
    assert np.allclose(unscaled_time_data, randvec2time)
    assert np.allclose(np.linalg.solve(rescaling_ift.T, rescaled_time_data), randvec)

    # extract a repeating wave
    longvec = np.empty(len(randvec)*100)
    short_count = np.random.random() * 4 + 1
    short_duration = len(longvec) / short_count
    for idx in range(len(longvec)):
        longvec[idx] = randvec[int((idx / len(longvec) * short_duration) % len(randvec))]
    shortspace_freqs = fftfreq(None, complex = False, dc_offset = True, repetition_samples = len(randvec))
    longspace_freqs = fftfreq(len(shortspace_freqs), complex = False, dc_offset = True, repetition_samples = short_duration)
    assert np.allclose(longspace_freqs, shortspace_freqs / (short_duration / len(randvec)))
    inserting_ift = create_freq2time(len(longvec), longspace_freqs, wavelet = STEP)
    extracting_ft = create_time2freq(len(longvec), longspace_freqs, wavelet = STEP)
    extracting_ift = create_freq2time(freqs = shortspace_freqs, wavelet = STEP)
    unextracting_ft = create_time2freq(freqs = shortspace_freqs, wavelet = STEP)
    inserting_spectrum = randvec @ unextracting_ft
    assert np.allclose(inserting_spectrum @ extracting_ift, randvec)
    assert np.allclose(longvec, inserting_spectrum @ inserting_ift)
    extracted_freqs = longvec @ extracting_ft
    extracted_randvec = extracted_freqs @ extracting_ift
    assert np.allclose(extracted_randvec, randvec)

if __name__ == '__main__':
    test()

Re: [ot][spam][random][crazy][random][crazy]

Reply via email to