@@ -59,7 +59,7 @@ def _multi_mass(Q, T, m, M_T, Σ_T):
5959 return D
6060
6161
62- def _get_first_mstump_profile (start , T , m , excl_zone , M_T , Σ_T ):
62+ def _get_first_mstump_profile (start , T_A , T_B , m , excl_zone , M_T , Σ_T ):
6363 """
6464 Multi-dimensional wrapper to compute the multi-dimensional matrix profile
6565 and multi-dimensional matrix profile index for a given window within the
@@ -72,10 +72,13 @@ def _get_first_mstump_profile(start, T, m, excl_zone, M_T, Σ_T):
7272 The window index to calculate the first matrix profile, matrix profile
7373 index, left matrix profile index, and right matrix profile index for.
7474
75- T : ndarray
75+ T_A : ndarray
7676 The time series or sequence for which the matrix profile index will
7777 be returned
7878
79+ T_B : ndarray
80+ The time series or sequence that contains your query subsequences
81+
7982 m : int
8083 Window size
8184
@@ -98,8 +101,8 @@ def _get_first_mstump_profile(start, T, m, excl_zone, M_T, Σ_T):
98101 equal to `start`
99102 """
100103
101- d , n = T .shape
102- D = _multi_mass (T [:, start : start + m ], T , m , M_T , Σ_T )
104+ d , n = T_A .shape
105+ D = _multi_mass (T_B [:, start : start + m ], T_A , m , M_T , Σ_T )
103106
104107 zone_start = max (0 , start - excl_zone )
105108 zone_stop = min (n - m + 1 , start + excl_zone )
@@ -159,22 +162,7 @@ def _get_multi_QT(start, T, m):
159162
160163@njit (parallel = True , fastmath = True )
161164def _mstump (
162- T ,
163- m ,
164- P ,
165- I ,
166- D ,
167- D_prime ,
168- range_stop ,
169- excl_zone ,
170- M_T ,
171- Σ_T ,
172- QT ,
173- QT_first ,
174- μ_Q ,
175- σ_Q ,
176- k ,
177- range_start = 1 ,
165+ T , m , range_stop , excl_zone , M_T , Σ_T , QT , QT_first , μ_Q , σ_Q , k , range_start = 1
178166):
179167 """
180168 A Numba JIT-compiled version of mSTOMP, a variant of mSTAMP, for parallel
@@ -190,18 +178,6 @@ def _mstump(
190178 m : int
191179 Window size
192180
193- P : ndarray
194- The output multi-dimensional matrix profile
195-
196- I : ndarray
197- The output multi-dimensional matrix profile index
198-
199- D : ndarray
200- Storage for the distance profile
201-
202- D_prime : ndarray
203- Storage for the cumulative sum of the distance profile
204-
205181 range_stop : int
206182 The index value along T for which to stop the matrix profile
207183 calculation. This parameter is here for consistency with the
@@ -260,8 +236,12 @@ def _mstump(
260236 QT_even = QT .copy ()
261237 d = T .shape [0 ]
262238
239+ P = np .empty ((d , range_stop - range_start ))
240+ I = np .empty ((d , range_stop - range_start ))
241+ D = np .empty ((d , k ))
242+ D_prime = np .empty (k )
243+
263244 for idx in range (range_start , range_stop ):
264- D [:, :] = 0.0
265245 for i in range (d ):
266246 # Numba's prange requires incrementing a range by 1 so replace
267247 # `for j in range(k-1,0,-1)` with its incrementing compliment
@@ -311,10 +291,11 @@ def _mstump(
311291 D_prime = D_prime + D [i ]
312292
313293 min_index = np .argmin (D_prime )
314- I [i , idx ] = min_index
315- P [i , idx ] = D_prime [min_index ] / (i + 1 )
316- if np .isinf (P [i , idx ]): # pragma nocover
317- I [i , idx ] = - 1
294+ pos = idx - range_start
295+ I [i , pos ] = min_index
296+ P [i , pos ] = D_prime [min_index ] / (i + 1 )
297+ if np .isinf (P [i , pos ]): # pragma nocover
298+ I [i , pos ] = - 1
318299
319300 return P , I
320301
@@ -359,55 +340,45 @@ def mstump(T, m):
359340 See mSTAMP Algorithm
360341 """
361342
362- T = np .asarray (core .transpose_dataframe (T ))
343+ T_A = np .asarray (core .transpose_dataframe (T )).copy ()
344+ T_B = T_A .copy ()
345+
346+ T_A [np .isinf (T_A )] = np .nan
347+ T_B [np .isinf (T_B )] = np .nan
363348
364- core .check_dtype (T )
365- core .check_nan (T )
366- if T .ndim <= 1 : # pragma: no cover
367- err = f"T is { T .ndim } -dimensional and must be greater than 1-dimensional"
349+ core .check_dtype (T_A )
350+ if T_A .ndim <= 1 : # pragma: no cover
351+ err = f"T is { T_A .ndim } -dimensional and must be at least 1-dimensional"
368352 raise ValueError (f"{ err } " )
369353
370354 core .check_window_size (m )
371355
372- d = T .shape [0 ]
373- n = T .shape [1 ]
356+ d = T_A .shape [0 ]
357+ n = T_A .shape [1 ]
374358 k = n - m + 1
375359 excl_zone = int (np .ceil (m / 4 )) # See Definition 3 and Figure 3
376360
377- M_T , Σ_T = core .compute_mean_std (T , m )
378- μ_Q , σ_Q = core .compute_mean_std (T , m )
361+ M_T , Σ_T = core .compute_mean_std (T_A , m )
362+ μ_Q , σ_Q = core .compute_mean_std (T_B , m )
363+
364+ T_A [np .isnan (T_A )] = 0
379365
380- P = np .full ((d , k ), np .inf , dtype = "float64" )
381- D = np .zeros ((d , k ), dtype = "float64" )
382- D_prime = np .zeros (k , dtype = "float64" )
383- I = np .ones ((d , k ), dtype = "int64" ) * - 1
366+ P = np .empty ((d , k ), dtype = "float64" )
367+ I = np .empty ((d , k ), dtype = "int64" )
384368
385369 start = 0
386370 stop = k
387371
388372 P [:, start ], I [:, start ] = _get_first_mstump_profile (
389- start , T , m , excl_zone , M_T , Σ_T
373+ start , T_A , T_B , m , excl_zone , M_T , Σ_T
390374 )
391375
392- QT , QT_first = _get_multi_QT (start , T , m )
393-
394- _mstump (
395- T ,
396- m ,
397- P ,
398- I ,
399- D ,
400- D_prime ,
401- stop ,
402- excl_zone ,
403- M_T ,
404- Σ_T ,
405- QT ,
406- QT_first ,
407- μ_Q ,
408- σ_Q ,
409- k ,
410- start + 1 ,
376+ T_B [np .isnan (T_B )] = 0
377+
378+ QT , QT_first = _get_multi_QT (start , T_A , m )
379+
380+ P [:, start + 1 : stop ], I [:, start + 1 : stop ] = _mstump (
381+ T_A , m , stop , excl_zone , M_T , Σ_T , QT , QT_first , μ_Q , σ_Q , k , start + 1
411382 )
412383
413384 return P .T , I .T
0 commit comments