AutoML-org
/

TempoPFN

Time Series Forecasting

foundation models

pretrained models

Model card Files Files and versions

TempoPFN / src /data /filter.py

Vladyslav Moroshan

Apply ruff formatting

0a58567 5 months ago

history blame contribute delete

2.24 kB

	import numpy as np
	import torch
	from scipy import signal
	from statsmodels.tsa.stattools import acf


	def lempel_ziv_complexity(binary_sequence: np.ndarray) -> int:
	"""Computes the Lempel-Ziv complexity of a binary sequence."""
	sub_strings = set()
	n = len(binary_sequence)
	i = 0
	count = 0
	while i < n:
	sub_str = ""
	for j in range(i, n):
	sub_str += str(binary_sequence[j])
	if sub_str not in sub_strings:
	sub_strings.add(sub_str)
	count += 1
	i = j + 1
	break
	else:
	i += 1
	return count


	def is_low_quality(
	series: torch.Tensor,
	autocorr_threshold: float = 0.2,
	snr_threshold: float = 0.5,
	complexity_threshold: float = 0.4,
	) -> bool:
	"""
	Returns True if the series appears non-forecastable (noise-like):
	- weak autocorrelation
	- low SNR proxy
	- high normalized Lempel-Ziv complexity
	"""
	x = series.squeeze().detach().cpu().numpy()
	if x.size < 20:
	return True
	if np.var(x) < 1e-10:
	return True

	x_detrended = signal.detrend(x)

	try:
	max_lags = min(len(x_detrended) // 4, 40)
	if max_lags < 1:
	autocorr_strength = 0.0
	else:
	acf_vals = acf(x_detrended, nlags=max_lags, fft=True)[1:]
	autocorr_strength = float(np.max(np.abs(acf_vals)))
	except Exception:
	autocorr_strength = 0.0

	win_size = max(3, min(len(x) // 10, 15))
	signal_est = np.convolve(x, np.ones(win_size) / win_size, mode="valid")
	noise_est = x[win_size - 1 :] - signal_est
	var_signal = float(np.var(signal_est))
	var_noise = float(np.var(noise_est))
	snr_proxy = var_signal / var_noise if var_noise > 1e-8 else 1.0

	median_val = float(np.median(x_detrended))
	binary_seq = (x_detrended > median_val).astype(np.uint8)
	complexity_score = lempel_ziv_complexity(binary_seq)
	normalized_complexity = complexity_score / max(1, len(binary_seq))

	is_random_like = (snr_proxy < snr_threshold) and (normalized_complexity > complexity_threshold)
	is_uncorrelated = autocorr_strength < autocorr_threshold
	return bool(is_uncorrelated and is_random_like)