AULA 2: Modelagem SARIMA e Prophet:

Insper - PADS

Financial Analytics

Autor: Paloma Vaissman Uribe

Código
# Imports
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox
import pandas as pd
from prophet import Prophet
# !pip install prophet

# Install StatsForecast if you haven't already
# !pip install statsforecast

from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA
from statsforecast.arima import arima_string

from sklearn.metrics import mean_squared_error

1. Simulando e analisando um processo ARIMA usando statsmodels

Código
np.random.seed(123)

# Simulate an ARIMA(1,1,0) process with AR coefficient equal to 0.5
ar_params = [1, -0.5]  # AR coefficients for ArmaProcess are specified as [1, -ar1, -ar2, ...]
arma_process = ArmaProcess(ar=ar_params, ma=[1])
diff_y_stationary = arma_process.generate_sample(nsample=500)

# Integrate (I=1) the stationary series to get the ARIMA(1,1,0) process 'y'
y = np.cumsum(diff_y_stationary)

# Plotting using subplots
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
plt.subplots_adjust(hspace=0.3, wspace=0.3)

axes[0, 0].plot(y)
axes[0, 0].set_title('Time Series Plot of $y \sim ARIMA(1,1,0)$')
axes[0, 0].grid(True)

# ACF Plot
plot_acf(y, lags=60, ax=axes[0, 1], title='ACF of Original Series (y)')

# PACF Plot
plot_pacf(y, lags=60, ax=axes[0, 2], title='PACF of Original Series (y)')

# Time Series Plot of Differenced Series
dy = np.diff(y)
axes[1, 0].plot(dy)
axes[1, 0].set_title('Time Series Plot of First Difference (dy)')
axes[1, 0].grid(True)

# ACF Plot of Differenced Series
plot_acf(dy, lags=60, ax=axes[1, 1], title='ACF of First Difference (dy)')

# PACF Plot of Differenced Series
plot_pacf(dy, lags=60, ax=axes[1, 2], title='PACF of First Difference (dy)')

plt.show()

2. Reproduzindo o exemplo das etapas de Box-Jenkins usando um processo IMA(1,1)

Código
np.random.seed(123)

# Parameters for the stationary MA(1) process:
ar_params = [1]
ma_params = [1, 0.9] # MA(1) coefficient is 0.9

# Simulate the stationary difference series (the MA(1) part)
arma_process = ArmaProcess(ar=ar_params, ma=ma_params)
diff_ima1 = arma_process.generate_sample(nsample=500)

# Create the IMA(1,1) series with a time index
dates = pd.date_range(start='2020-01-01', periods=500, freq='D')
ima1 = pd.Series(np.cumsum(diff_ima1), index=dates)

# Plotting the series (optional, but often helpful):
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 4))
plt.plot(ima1)
plt.title('Simulated IMA(1,1) Time Series')
plt.show()

Código
## 1. Model estimation
model = ARIMA(endog=ima1, order=(0, 1, 1))
results = model.fit()
print(results.summary())
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  500
Model:                 ARIMA(0, 1, 1)   Log Likelihood                -710.907
Date:                Mon, 13 Oct 2025   AIC                           1425.813
Time:                        00:03:01   BIC                           1434.239
Sample:                    01-01-2020   HQIC                          1429.120
                         - 05-14-2021                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ma.L1          0.9090      0.019     48.887      0.000       0.873       0.945
sigma2         1.0080      0.066     15.372      0.000       0.879       1.136
===================================================================================
Ljung-Box (L1) (Q):                   0.22   Jarque-Bera (JB):                 0.24
Prob(Q):                              0.64   Prob(JB):                         0.89
Heteroskedasticity (H):               0.86   Skew:                            -0.01
Prob(H) (two-sided):                  0.33   Kurtosis:                         2.90
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
Código
## 2. Model diagnostic

res = results.resid

# Python: Plot the Autocorrelation Function (ACF) of the residuals
# We use lags=24 to align with the Ljung-Box test below.
fig, ax = plt.subplots(figsize=(10, 4))
plot_acf(res, lags=24, zero=False, ax=ax, title='ACF of ARIMA(0,1,1) Residuals')
ax.set_xlabel('Lag')
plt.show()

# 3. R: Box.test(res, lag=24, fitdf=4, type="Ljung")

print("\n--- Ljung-Box Test (Q-Test) Results ---")

# Calculate for Ljung-Box up to 24 lags, compensating for 2 estimated parameters (MA1 and Constant)
# The degrees of freedom for the Q-statistic is lags - model_df. (24 - 2 = 22)
lb_test_results_df = acorr_ljungbox(res, lags=[24], model_df=2, return_df=True)
print("\nLjung-Box Test (Lags=24, Model DF=2 (MA1 + Constant)):")
print(lb_test_results_df)

#


--- Ljung-Box Test (Q-Test) Results ---

Ljung-Box Test (Lags=24, Model DF=2 (MA1 + Constant)):
      lb_stat  lb_pvalue
24  27.504276   0.192697
Código
results.plot_diagnostics(figsize=(15, 12))
plt.show()

Código
## 3. Model prediction

# Generate forecast for 12 steps ahead (h=12)
forecast_results = results.get_forecast(steps=12)

# Extract predictions and confidence intervals
mean_forecast = forecast_results.predicted_mean
conf_int = forecast_results.conf_int()

# Plot the forecast
plt.figure(figsize=(10, 5))
plt.plot(ima1, label='Historical Data', color='blue')
plt.plot(mean_forecast, label='Forecast (h=12)', color='red', linestyle='--')
plt.fill_between(mean_forecast.index, conf_int.iloc[:, 0], conf_int.iloc[:, 1],
                 color='pink', alpha=0.5, label='95% Confidence Interval')
plt.title('ARIMA(0,1,1) Forecast')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

Código
mean_forecast
array([-36.69689052, -36.69689052, -36.69689052, -36.69689052,
       -36.69689052, -36.69689052, -36.69689052, -36.69689052,
       -36.69689052, -36.69689052, -36.69689052, -36.69689052])

3. Auto ARIMA usando StatsForecast

Código
# Create a DataFrame from the 'ima1' numpy array
data = pd.DataFrame({
    'unique_id': 'ima1_series',  # A unique identifier for our single series
    'ds': range(len(ima1)),      # Using integers as time steps
    'y': ima1                    # The time series values
})

# Initializate StatsForecast
sf = StatsForecast(
    models=[AutoARIMA()],
    freq=1 # Frequency of the data (1 for no specific frequency, or hourly, daily etc.)
)

# Fit the model to the data
sf.fit(data)

# Get the results
forecast_df = sf.predict(h=10) # Forecast 10 steps ahead
print("\nForecast:")
print(forecast_df)

# Model

Forecast:
     unique_id   ds  AutoARIMA
0  ima1_series  500 -36.696439
1  ima1_series  501 -36.696439
2  ima1_series  502 -36.696439
3  ima1_series  503 -36.696439
4  ima1_series  504 -36.696439
5  ima1_series  505 -36.696439
6  ima1_series  506 -36.696439
7  ima1_series  507 -36.696439
8  ima1_series  508 -36.696439
9  ima1_series  509 -36.696439
Código
fitted_autoarima_model = sf.fitted_[0][0].model_
print(fitted_autoarima_model)
{'coef': {'ma1': np.float64(0.9092546172913577)}, 'sigma2': np.float64(1.0099914279544253), 'var_coef': array([[1.8825789e-05]]), 'mask': array([ True]), 'loglik': np.float64(-710.4546259869771), 'aic': np.float64(1424.9092519739543), 'arma': (0, 1, 0, 0, 1, 1, 0), 'residuals': array([-1.08562961e-03,  1.50039447e-02,  9.98504972e-01, -1.83051895e+00,
       -4.01135932e-01,  1.39637672e+00, -2.06379156e+00, -7.84313377e-01,
        1.53518707e+00, -1.07046473e+00, -4.96196564e-01, -2.58111314e-01,
        1.62215373e+00, -7.51336945e-01, -3.39027886e-01, -5.24880884e-01,
        2.27989884e+00,  2.10059195e+00,  1.06583294e+00,  3.22762000e-01,
        7.90606906e-01,  1.43458651e+00, -8.95269461e-01,  1.14525656e+00,
       -1.23469414e+00, -6.44129510e-01,  9.17798309e-01, -1.44554158e+00,
       -1.12140977e-01, -8.85585871e-01, -2.26191570e-01, -2.82236728e+00,
       -1.72428477e+00, -7.26633645e-01,  9.58031453e-01, -2.09873476e-01,
        3.73775707e-02,  6.56751525e-01, -8.57191042e-01,  2.71386481e-01,
       -7.96814323e-01, -1.72796204e+00, -3.74689634e-01,  5.62660689e-01,
        3.43418020e-01, -1.93481878e-02,  2.39927708e+00,  3.84512670e-01,
        1.00073162e+00,  2.20907732e+00, -1.28834627e+00, -1.03203224e+00,
        1.74717031e+00, -8.17334496e-01,  5.45890816e-02,  1.04639306e+00,
        9.01653548e-01,  1.73668802e+00,  1.49595068e+00,  1.05527302e+00,
       -7.69765330e-01,  7.99336206e-01,  3.02848635e-01, -1.31878629e+00,
        1.42277138e+00,  7.89144518e-01,  5.44699129e-02, -2.41677932e-01,
       -1.18833703e+00,  2.01553716e-01,  4.64747104e-01, -8.32133069e-01,
        1.17078520e+00, -1.11576105e+00, -2.09607214e+00,  1.03479987e+00,
       -4.08508144e-01, -1.17621116e-01, -8.43995784e-01, -1.59232073e+00,
        1.25769580e+00, -7.02721025e-01,  1.67992270e+00,  7.74687963e-01,
       -2.92569375e-01, -1.10316467e+00, -7.06716580e-01, -1.22915360e+00,
        2.11345611e+00,  1.21173532e-01,  1.18802505e+00, -1.31238432e+00,
        2.33709784e-01,  1.12829185e+00, -3.00839593e-01,  1.00314456e+00,
       -1.06867872e+00, -1.36788160e+00,  3.96028886e-01, -3.97806976e-01,
        6.62503727e-01, -2.00242328e+00,  7.52878112e-01,  2.55478420e+00,
       -9.10177539e-03,  2.02545769e-02,  1.91860833e-01, -1.87483152e+00,
        4.55067718e-01, -1.63565019e+00, -3.85325878e-01,  1.20831725e+00,
       -7.15302396e-01,  4.89945735e-01,  1.01837772e+00,  2.64241358e-01,
       -1.36034437e+00, -3.29429495e-01,  1.95971888e+00, -2.04345900e+00,
       -2.40302671e-01, -5.81819171e-01,  1.52871866e-01,  7.17888794e-01,
        1.62934132e+00, -3.03896642e-01,  8.45451602e-01,  4.62116569e-01,
        5.03931804e-01, -5.95213683e-01, -9.63652217e-01, -1.12142721e+00,
       -7.26813145e-01,  3.01751296e-01,  7.76098552e-01,  3.02652109e-01,
       -5.33020961e-01,  1.79656229e+00,  1.51070616e+00, -3.60637605e-01,
       -8.14120100e-01,  1.29369149e-01,  1.26686261e+00,  3.21433082e-01,
        5.63772670e-01, -2.23799191e-01,  4.68889233e-01,  1.52884856e+00,
       -2.39691384e-01,  1.45546327e-01,  2.50454766e-01,  2.84433033e-01,
       -1.41515810e+00, -1.86082960e+00, -1.01686895e+00,  1.74845521e-01,
        5.46025134e-01, -5.28679885e-01,  1.38035501e+00, -1.58738402e-01,
        3.57912464e-02, -2.08222828e-01,  1.48786877e-01,  6.89813032e-01,
        6.72464420e-01, -9.10776225e-01,  1.54321062e+00, -1.12690044e+00,
        1.18342629e-01, -3.10695843e-01, -1.01344696e+00, -9.77317664e-02,
       -7.19559243e-01,  6.04374111e-02,  4.13749485e-01,  1.45850310e+00,
        3.05969969e-01, -6.12784150e-01, -3.84545799e-01,  1.37170320e-01,
        9.47183785e-02,  1.45758089e+00,  1.38367112e+00, -3.61227640e-01,
       -5.43236562e-01, -2.55689217e+00, -5.25403548e-01, -9.94360476e-01,
       -3.30949540e-01,  3.73159627e-01,  1.90321033e-01, -4.35451891e-02,
        2.12204569e-01, -1.39441857e-01,  2.10301089e-01, -3.24495521e+00,
       -2.26752494e-01, -1.47039110e-01, -3.07331376e-01, -2.45639334e-01,
        7.30507181e-01, -6.29343253e-01,  2.23464056e+00,  6.37071573e-01,
        3.38997170e-02, -2.43162303e-01, -5.14219123e-02, -9.46421516e-01,
       -5.84407684e-02,  2.46138770e-01,  6.06553968e-01,  5.49765238e-01,
       -2.53033326e-01, -1.43340032e+00, -6.40250790e-01,  1.59215167e+00,
        8.99360798e-01,  3.58324110e-01, -7.54444469e-01,  4.46245144e-03,
       -1.25644555e+00, -5.39609774e-01, -2.51303851e-01, -3.53823753e-01,
        9.52841873e-01, -1.42416005e+00, -8.47361494e-01, -1.38310966e+00,
       -1.21697354e+00,  1.16976814e-01, -1.59515193e+00,  7.63871560e-01,
       -2.62887619e-01,  8.56859150e-02,  3.06575923e-01, -4.22603030e-01,
        1.02598360e+00, -1.97991312e-01,  5.99186315e-01, -2.09262077e-01,
        3.01945753e-01,  2.66904049e-01,  2.58982604e-01, -9.84816032e-01,
        4.54447230e-01, -3.39877038e-01,  6.52461507e-01, -2.17830785e+00,
       -1.42172385e+00,  3.37549935e-01,  1.88310975e+00,  7.99467335e-01,
       -6.57817428e-01, -1.70813472e+00,  7.72968431e-01, -1.14990738e-01,
        6.22735177e-01,  8.07076829e-02,  6.00279418e-02,  2.93072340e+00,
       -8.14083890e-03, -1.57360079e-01, -1.21688985e-01, -5.82194253e-01,
        9.94312662e-01, -3.66718108e-01,  6.51003938e-01,  2.65023129e-01,
        1.23415509e+00,  3.95105663e-01, -1.19447479e+00, -1.33245237e+00,
        1.42606861e+00, -6.37838038e-01, -1.28848512e+00, -6.86599875e-01,
        1.28626265e+00, -1.45159050e+00, -8.24821540e-01, -6.96680398e-01,
       -1.21785676e+00, -1.20305634e+00, -1.49011805e+00, -4.72587876e-01,
       -3.40274796e-01, -6.92559674e-01, -1.63826910e+00,  1.25461163e+00,
       -1.34183790e+00,  3.01935069e-01, -1.09907211e+00,  7.00363322e-01,
        9.20508951e-01, -8.54341481e-01, -1.93675402e+00,  7.25445699e-01,
        1.84239428e+00,  4.28709523e-01,  1.44863394e-01,  8.32532879e-01,
        7.45855030e-01, -8.18611602e-01, -6.60914882e-01,  6.18827278e-01,
       -1.56673713e-01,  1.32944988e+00, -7.28205553e-01,  7.78826245e-01,
        3.10035547e-01, -9.99646330e-02,  1.15301027e+00,  6.90011317e-01,
       -1.02029875e+00,  6.63459592e-01,  1.34793375e+00, -1.18568104e-01,
       -2.58247475e-01, -6.58624704e-01, -1.74287652e-02,  6.89804374e-01,
       -8.27295494e-01, -1.33003678e+00, -3.78129630e-01, -1.37408379e+00,
        5.30857988e-01, -4.38478249e-01, -1.74073426e+00, -3.45698507e-01,
       -1.92033785e-01,  4.50389732e-01, -1.50660435e-01,  1.87488796e+00,
       -5.41600578e-01, -3.67791883e-02, -1.25627661e-01, -2.61020406e-01,
        1.25394049e-01,  5.55399206e-01,  1.32220758e+00, -1.56539064e+00,
       -4.99334721e-01, -4.53057611e-01,  9.46800739e-01, -3.73480737e-01,
       -1.87658326e+00,  8.83644269e-02, -3.50775772e-02,  1.81325706e-01,
       -1.04325771e+00,  1.73147498e+00, -3.51080187e-01, -1.60549383e-01,
       -9.23495475e-01, -9.01317192e-01, -1.24111194e+00, -3.65287349e-01,
       -9.79422397e-01,  2.08518802e+00, -1.93363861e+00, -1.06980386e+00,
       -4.44187827e-01,  7.16098125e-01, -4.58730257e-01,  7.20684804e-01,
       -1.03372282e+00,  1.11084882e-01,  1.69639867e+00, -9.40565737e-01,
        4.03811459e-01,  2.17370994e-01, -2.26294580e+00, -5.57758118e-01,
        1.10822256e+00,  1.45457910e-01, -1.22295733e+00,  7.11714809e-01,
       -5.04553376e-01,  3.87607564e-01,  3.48531802e-01, -1.09637165e+00,
        7.79882978e-01, -1.00577577e+00, -1.29803753e+00, -8.77789653e-01,
       -3.42501827e-01,  1.09343494e-01, -7.73541968e-01,  7.52840560e-01,
        2.37277810e-01, -6.84226394e-01,  8.85810532e-01,  1.15573467e+00,
       -1.04580257e-01, -2.08709190e-01, -1.77748630e+00, -3.87719186e-01,
       -3.05732586e-01,  2.61012131e-01, -1.84911451e-01,  2.60574740e-01,
        1.51678575e+00, -5.28377283e-01, -4.87465487e-01, -1.30781012e+00,
        2.22314410e-03,  9.66997158e-01, -1.75118530e+00, -6.49546965e-01,
        2.72728093e-02,  8.57651408e-01,  3.68139331e-01,  3.35314456e-01,
       -2.36281738e-01,  4.94833011e-01,  9.98061515e-01, -1.70361032e+00,
        7.39458353e-01, -1.15433867e-01, -5.13913913e-01, -2.45278717e+00,
       -1.34891148e+00,  1.68224679e+00, -6.89886099e-01, -1.07718033e+00,
       -4.53286662e-01, -7.43777592e-01,  1.30009343e-01,  1.51068485e+00,
       -5.94936818e-01,  1.67284743e-01, -1.16048052e+00,  1.54282529e+00,
        1.54372224e-01, -1.05204368e+00,  8.87244585e-01,  1.20713201e+00,
        1.43854893e-01, -1.62488697e+00,  1.78714752e+00,  2.91802319e-01,
        1.18424196e+00, -5.74261807e-01, -3.72277609e-01,  9.01867123e-02,
        1.13080679e+00,  1.16860242e-01, -1.23439850e+00,  2.53221303e-01,
        1.63508183e+00,  4.56813512e-02,  1.47075199e-01, -1.27982557e+00,
       -1.37559114e+00, -6.49619719e-01, -3.09770842e-01,  9.18554216e-02,
        1.76091670e-01,  1.59115676e+00,  5.36376896e-02,  1.06847107e+00,
       -5.28611653e-02,  1.46157147e+00, -1.89532484e+00, -5.58756640e-01,
        9.52226861e-01, -1.52682619e-01,  9.15920585e-01,  3.90637734e-01,
        9.37247313e-01,  6.12160567e-01,  6.68800067e-01,  1.49062988e+00,
        9.74321768e-01,  9.95726758e-01,  9.47564288e-03, -5.72089099e-01,
       -6.63050622e-01,  7.60222358e-01, -2.45384612e+00, -1.27062179e+00,
       -9.84395330e-01, -2.19483697e-01, -2.65473071e+00,  1.30425323e+00,
        5.47126139e-01,  2.02726314e+00,  1.00504025e+00,  1.07253486e+00,
        2.20281015e+00, -5.48264963e-01, -1.51880823e+00, -2.04486561e-01,
       -8.48183361e-01, -2.28123572e-01, -1.07395740e+00,  5.99183704e-01,
        4.79264361e-01, -1.01135869e-03,  2.88270754e-01, -1.23738889e-01]), 'code': 2, 'n_cond': 0, 'nobs': np.int64(499), 'model': {'phi': array([], dtype=float64), 'theta': array([0.90925462]), 'delta': array([1.]), 'Z': array([1., 0., 1.]), 'a': array([  0.13837263,  -0.11251016, -36.72230181]), 'P': array([[ 0.00000000e+00,  0.00000000e+00,  3.01591344e-22],
       [ 0.00000000e+00,  7.77156117e-16,  3.02189835e-21],
       [ 3.01591344e-22,  3.02189835e-21, -3.01591344e-22]]), 'T': array([[0., 1., 0.],
       [0., 0., 0.],
       [1., 0., 1.]]), 'V': array([[1.        , 0.90925462, 0.        ],
       [0.90925462, 0.82674396, 0.        ],
       [0.        , 0.        , 0.        ]]), 'h': 0.0, 'Pn': array([[ 1.00000000e+00,  9.09254617e-01, -3.02189835e-21],
       [ 9.09254617e-01,  8.26743959e-01,  0.00000000e+00],
       [-3.02189835e-21,  0.00000000e+00, -3.01591344e-22]])}, 'bic': np.float64(1433.3344641654573), 'aicc': np.float64(1424.9334455223413), 'ic': None, 'xreg': None, 'x': array([-1.08563060e+00, -1.06535270e+00,  1.15236700e-01, -1.13637737e+00,
       -3.07064286e+00, -1.93994655e+00, -2.88033291e+00, -5.49325686e+00,
       -4.61334196e+00, -4.34073973e+00, -5.79969225e+00, -6.50539875e+00,
       -5.09924720e+00, -4.39589853e+00, -5.41489229e+00, -6.24882733e+00,
       -4.43381339e+00, -2.61690230e-01,  2.71047115e+00,  4.00030606e+00,
        5.08524239e+00,  7.23960614e+00,  7.64543109e+00,  7.97900966e+00,
        7.78337513e+00,  6.01713103e+00,  6.35025987e+00,  5.73797385e+00,
        4.31209250e+00,  3.32427575e+00,  2.29307697e+00, -7.35569565e-01,
       -5.02583286e+00, -7.32008989e+00, -7.02251697e+00, -6.36143647e+00,
       -6.51486266e+00, -5.82407863e+00, -6.08421453e+00, -6.59216992e+00,
       -7.14227184e+00, -9.59477120e+00, -1.15405735e+01, -1.13185775e+01,
       -1.04635632e+01, -1.01706635e+01, -7.78894570e+00, -5.22290480e+00,
       -3.87254785e+00, -7.53542102e-01, -3.32984202e-02, -2.23676342e+00,
       -1.42796059e+00, -6.56682318e-01, -1.34525555e+00, -2.49224673e-01,
        1.60386609e+00,  4.16038803e+00,  7.23542973e+00,  9.65090212e+00,
        9.84064681e+00,  9.94007163e+00,  1.09697200e+01,  9.92629936e+00,
        1.01499595e+01,  1.22327652e+01,  1.30047681e+01,  1.28126171e+01,
        1.14045331e+01,  1.05255862e+01,  1.11735970e+01,  1.07640372e+01,
        1.11782018e+01,  1.11269824e+01,  8.01639927e+00,  7.14533604e+00,
        7.67772439e+00,  7.18866537e+00,  6.23772202e+00,  3.87799421e+00,
        3.68786510e+00,  4.12870975e+00,  5.16968015e+00,  7.47184558e+00,
        7.88366480e+00,  6.51448007e+00,  4.80470592e+00,  2.93296700e+00,
        3.92880954e+00,  5.97165279e+00,  7.26985544e+00,  7.03768838e+00,
        6.07810667e+00,  7.41890022e+00,  8.14396521e+00,  8.87356998e+00,
        8.71700508e+00,  6.37742242e+00,  5.52969864e+00,  5.49198275e+00,
        5.79277865e+00,  4.39273994e+00,  3.32490544e+00,  6.56424754e+00,
        8.87809509e+00,  8.89007384e+00,  9.10035124e+00,  7.39997006e+00,
        6.15033856e+00,  4.92846079e+00,  3.05591243e+00,  3.91387034e+00,
        4.29723598e+00,  4.13678971e+00,  5.60065285e+00,  6.79085885e+00,
        5.67077715e+00,  4.10444826e+00,  5.76463185e+00,  5.50305630e+00,
        3.40472910e+00,  2.60441361e+00,  2.22826371e+00,  3.08515195e+00,
        5.36723698e+00,  6.54482646e+00,  7.11395863e+00,  8.34480598e+00,
        9.26891940e+00,  9.13190804e+00,  7.62705503e+00,  5.62942260e+00,
        3.88294659e+00,  3.52383968e+00,  4.57430699e+00,  5.58263029e+00,
        5.32479716e+00,  6.63670768e+00,  9.78094640e+00,  1.07939254e+01,
        9.65189384e+00,  9.04102053e+00,  1.04255126e+01,  1.18988464e+01,
        1.27548836e+01,  1.30436973e+01,  1.33090961e+01,  1.52642843e+01,
        1.64147056e+01,  1.63423114e+01,  1.67251048e+01,  1.72372650e+01,
        1.60807290e+01,  1.29331603e+01,  1.02243234e+01,  9.47457618e+00,
        1.01795804e+01,  1.01473764e+01,  1.10470268e+01,  1.21433825e+01,
        1.20348402e+01,  1.18591607e+01,  1.18186200e+01,  1.26437182e+01,
        1.39433983e+01,  1.36440634e+01,  1.43591466e+01,  1.46354175e+01,
        1.37291207e+01,  1.35260285e+01,  1.22300799e+01,  1.12108668e+01,
        1.04024445e+01,  9.80861931e+00,  1.02773218e+01,  1.21120285e+01,
        1.37441492e+01,  1.34095696e+01,  1.24678470e+01,  1.22553673e+01,
        1.24748084e+01,  1.40185124e+01,  1.67274957e+01,  1.76243774e+01,
        1.67526929e+01,  1.37018604e+01,  1.08515909e+01,  9.37950479e+00,
        8.14442840e+00,  8.21667063e+00,  8.74628878e+00,  8.87579387e+00,
        9.04840477e+00,  9.10191090e+00,  9.18542384e+00,  6.13168587e+00,
        2.95444287e+00,  2.60122801e+00,  2.16020064e+00,  1.63511884e+00,
        2.14227732e+00,  2.17715109e+00,  3.83955839e+00,  6.50848721e+00,
        7.12164720e+00,  6.90930837e+00,  6.63679001e+00,  5.64361288e+00,
        4.72463398e+00,  4.91763521e+00,  5.74799199e+00,  6.84926923e+00,
        7.09611248e+00,  5.43264044e+00,  3.48906378e+00,  4.49906446e+00,
        6.84609651e+00,  8.02216858e+00,  7.59353197e+00,  6.91201230e+00,
        5.65962425e+00,  3.97758556e+00,  3.23563903e+00,  2.65331609e+00,
        3.28444208e+00,  2.72665791e+00,  5.84372314e-01, -1.56920470e+00,
       -4.04377709e+00, -5.03333908e+00, -6.52212931e+00, -7.20865701e+00,
       -6.77699089e+00, -6.93033675e+00, -6.54585052e+00, -6.68969797e+00,
       -6.04796812e+00, -5.31307911e+00, -4.89391731e+00, -4.55836646e+00,
       -4.44669322e+00, -3.90524360e+00, -3.40357725e+00, -4.15291216e+00,
       -4.59391345e+00, -4.52058225e+00, -4.17715551e+00, -5.76220972e+00,
       -9.16457005e+00, -1.01197291e+01, -7.92970049e+00, -5.41800692e+00,
       -5.34890498e+00, -7.65516324e+00, -8.43532419e+00, -7.84748981e+00,
       -7.32931050e+00, -6.68237798e+00, -6.54896620e+00, -3.56366212e+00,
       -9.07029183e-01, -1.07179136e+00, -1.33656072e+00, -2.02940125e+00,
       -1.56445140e+00, -1.02708612e+00, -7.09522319e-01,  1.47429146e-01,
        1.62255773e+00,  3.13982461e+00,  2.30460146e+00, -1.13932624e-01,
        1.00597515e-01,  7.59418944e-01, -1.10902336e+00, -2.96718428e+00,
       -2.30521573e+00, -2.58726597e+00, -4.73195288e+00, -6.17860607e+00,
       -8.02992270e+00, -1.03403209e+01, -1.29243235e+01, -1.47518081e+01,
       -1.55217856e+01, -1.65237417e+01, -1.87917239e+01, -1.90267160e+01,
       -1.92277925e+01, -2.01459297e+01, -2.09704660e+01, -2.12694390e+01,
       -1.97121215e+01, -1.97294860e+01, -2.24430539e+01, -2.34786107e+01,
       -2.09766016e+01, -1.88726866e+01, -1.83380171e+01, -1.73737665e+01,
       -1.58709271e+01, -1.60113666e+01, -1.74166078e+01, -1.73987205e+01,
       -1.69927226e+01, -1.58057290e+01, -1.53251261e+01, -1.52084241e+01,
       -1.41902372e+01, -1.40083006e+01, -1.29461837e+01, -1.12077924e+01,
       -1.16006952e+01, -1.18649470e+01, -9.91375952e+00, -8.80671264e+00,
       -9.17276871e+00, -1.00662061e+01, -1.06824924e+01, -1.00085353e+01,
       -1.02086229e+01, -1.22908820e+01, -1.38783537e+01, -1.55962536e+01,
       -1.63147876e+01, -1.62705808e+01, -1.84100034e+01, -2.03384726e+01,
       -2.08448344e+01, -2.05690522e+01, -2.03101937e+01, -1.85722944e+01,
       -1.74091445e+01, -1.79383765e+01, -1.80974458e+01, -1.84726937e+01,
       -1.85846337e+01, -1.79152194e+01, -1.60880125e+01, -1.64511798e+01,
       -1.83738532e+01, -1.92809332e+01, -1.87460772e+01, -1.82586750e+01,
       -2.04748473e+01, -2.20927749e+01, -2.20475067e+01, -2.18980755e+01,
       -2.27764619e+01, -2.19935738e+01, -2.07703024e+01, -2.12500731e+01,
       -2.23195488e+01, -2.40605585e+01, -2.61211973e+01, -2.76149714e+01,
       -2.89265330e+01, -2.77318893e+01, -2.77695611e+01, -3.05975348e+01,
       -3.20144467e+01, -3.17022284e+01, -3.15098432e+01, -3.12062610e+01,
       -3.15846978e+01, -3.24135302e+01, -3.06161271e+01, -3.00142345e+01,
       -3.04656368e+01, -2.98810983e+01, -3.19463985e+01, -3.45617506e+01,
       -3.39606722e+01, -3.28075578e+01, -3.38982568e+01, -3.42985216e+01,
       -3.41559450e+01, -3.42271050e+01, -3.35261392e+01, -3.43056067e+01,
       -3.45226047e+01, -3.48192683e+01, -3.70318120e+01, -3.90898483e+01,
       -4.02304844e+01, -4.04325623e+01, -4.11066832e+01, -4.10571892e+01,
       -4.01353877e+01, -4.06038681e+01, -4.03401936e+01, -3.83790316e+01,
       -3.74327548e+01, -3.77365541e+01, -3.97038102e+01, -4.17077170e+01,
       -4.23659850e+01, -4.23829617e+01, -4.23305466e+01, -4.22381035e+01,
       -4.04843889e+01, -3.96336218e+01, -4.06015167e+01, -4.23525571e+01,
       -4.35394664e+01, -4.25704478e+01, -4.34423865e+01, -4.56842067e+01,
       -4.62475375e+01, -4.53650882e+01, -4.42171253e+01, -4.35470785e+01,
       -4.34784740e+01, -4.31984813e+01, -4.17504906e+01, -4.25466088e+01,
       -4.33561660e+01, -4.27992440e+01, -4.34181167e+01, -4.63381824e+01,
       -4.99173020e+01, -4.94615592e+01, -4.86218546e+01, -5.03263171e+01,
       -5.17590349e+01, -5.29149655e+01, -5.34612394e+01, -5.18323429e+01,
       -5.10536825e+01, -5.14273469e+01, -5.24357230e+01, -5.19480699e+01,
       -5.03908767e+01, -5.13025567e+01, -5.13718877e+01, -4.93580245e+01,
       -4.81165792e+01, -4.96106655e+01, -4.93009539e+01, -4.73841795e+01,
       -4.59346149e+01, -4.54320992e+01, -4.63265270e+01, -4.65748355e+01,
       -4.53620260e+01, -4.42169745e+01, -4.53451172e+01, -4.62142785e+01,
       -4.43489540e+01, -4.28165669e+01, -4.26279558e+01, -4.37740525e+01,
       -4.63133310e+01, -4.82137133e+01, -4.91141539e+01, -4.93039590e+01,
       -4.90443474e+01, -4.72930785e+01, -4.57926741e+01, -4.46754328e+01,
       -4.37567817e+01, -4.23432745e+01, -4.29096587e+01, -4.51917482e+01,
       -4.47475734e+01, -4.40344393e+01, -4.32573461e+01, -4.20339034e+01,
       -4.07414669e+01, -3.92771099e+01, -3.80517000e+01, -3.59529606e+01,
       -3.36232767e+01, -3.17416434e+01, -3.08267986e+01, -3.13902719e+01,
       -3.25734972e+01, -3.24161567e+01, -3.41787671e+01, -3.76805598e+01,
       -3.98202738e+01, -4.09348235e+01, -4.37891208e+01, -4.48986937e+01,
       -4.31656693e+01, -4.06409292e+01, -3.77925906e+01, -3.58062182e+01,
       -3.26282008e+01, -3.11735505e+01, -3.31908712e+01, -3.47763411e+01,
       -3.58104548e+01, -3.68097930e+01, -3.80911728e+01, -3.84684899e+01,
       -3.74444150e+01, -3.70096530e+01, -3.67223018e+01, -3.65839292e+01]), 'lambda': None}
Código
arima_string(fitted_autoarima_model)
'ARIMA(0,1,1)                   '

4. Modelagem sazonal: Prophet

Para os tópicos a seguir (inclusive ARIMA), vamos usar novamente a clássica série Airline e comparar dois modelos: Prophet e SARIMA, de acordo com métricas preditivas

Código
# Reading airline data
url="https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv"
airline=pd.read_csv(url,parse_dates = ['Month'],index_col = ['Month'])
airline.head()
Passengers
Month
1949-01-01 112
1949-02-01 118
1949-03-01 132
1949-04-01 129
1949-05-01 121
Código
# Transform dataset to Prophet
df = pd.DataFrame(airline.reset_index())
df.columns = ['ds', 'y']
df.head()
ds y
0 1949-01-01 112
1 1949-02-01 118
2 1949-03-01 132
3 1949-04-01 129
4 1949-05-01 121
Código
# get index for training dataset
idx = df.shape[0]-12
df_train = df.iloc[:idx]
df_test = df.iloc[idx:]
df_train
ds y
0 1949-01-01 112
1 1949-02-01 118
2 1949-03-01 132
3 1949-04-01 129
4 1949-05-01 121
... ... ...
127 1959-08-01 559
128 1959-09-01 463
129 1959-10-01 407
130 1959-11-01 362
131 1959-12-01 405

132 rows × 2 columns

Código
## Prophet using default values

m = Prophet()
m.fit(df_train)

# make future dataframe
future = m.make_future_dataframe(periods=12, freq='MS')

# predict method
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
ds yhat yhat_lower yhat_upper
139 1960-08-01 527.280590 500.638342 551.991252
140 1960-09-01 486.059339 459.498518 511.913797
141 1960-10-01 452.372785 425.321933 478.266611
142 1960-11-01 422.546610 394.918101 447.231719
143 1960-12-01 447.861729 422.664873 473.433933
Código
fig1 = m.plot(forecast)

Código
# components
fig2 = m.plot_components(forecast)

Código
# RMSE Prophet w/ default values

y_true = df_test['y']
y_pred = forecast.iloc[idx:]['yhat']
np.sqrt(mean_squared_error(y_true, y_pred))
np.float64(43.06779943855511)
Código
## improving fit with multiplicative seasonality

# fit method
m2 = Prophet(seasonality_mode='multiplicative')
m2.fit(df_train)

# make future dataframe: note to specify frequency here
future = m2.make_future_dataframe(periods=12, freq='MS')

# predict method
forecast2 = m2.predict(future)
forecast2[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
ds yhat yhat_lower yhat_upper
139 1960-08-01 573.161035 560.506141 584.914388
140 1960-09-01 496.542106 483.518880 508.761994
141 1960-10-01 433.293194 421.550570 446.541725
142 1960-11-01 379.728025 366.920953 392.332337
143 1960-12-01 420.369765 407.916878 433.798482
Código
fig3 = m2.plot(forecast2)

Código
# components
fig4 = m2.plot_components(forecast2)

Código
# RMSE: better than additive seasonality

from sklearn.metrics import mean_squared_error
import numpy as np
y_true = df_test['y']
y_pred = forecast2.iloc[idx:]['yhat']
np.sqrt(mean_squared_error(y_true, y_pred))
np.float64(25.84359362409569)

5. Modelagem SARIMA

Código
## ARIMA é suficiente?

# fit model
model = ARIMA(endog=df_train.set_index('ds'), order=(2,1,0), freq='MS')
model_fit = model.fit()

# summary of fit model
print(model_fit.summary())

# line plot of residuals
model_fit.plot_diagnostics(figsize=(15, 12))
plt.show()
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  132
Model:                 ARIMA(2, 1, 0)   Log Likelihood                -628.265
Date:                Mon, 13 Oct 2025   AIC                           1262.530
Time:                        01:58:06   BIC                           1271.156
Sample:                    01-01-1949   HQIC                          1266.035
                         - 12-01-1959                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.3589      0.096      3.754      0.000       0.172       0.546
ar.L2         -0.2182      0.074     -2.952      0.003      -0.363      -0.073
sigma2       856.2091    101.066      8.472      0.000     658.124    1054.294
===================================================================================
Ljung-Box (L1) (Q):                   0.22   Jarque-Bera (JB):                 1.12
Prob(Q):                              0.64   Prob(JB):                         0.57
Heteroskedasticity (H):               7.29   Skew:                            -0.04
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.44
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

Código
## SARIMA é mais adequado: vamos usar o Auto Arima para detectar as ordens

models = [AutoARIMA(season_length=12)]
sf = StatsForecast(models=models, freq='MS')
df = df_train.copy()
df["unique_id"]="1"
df.columns=["ds", "y", "unique_id"]
sf.fit(df)
StatsForecast(models=[AutoARIMA])
Código
arima_string(sf.fitted_[0,0].model_)
'ARIMA(1,1,0)(0,1,0)[12]                   '
Código
## summary of auto model

# fit model
model = ARIMA(endog=df_train.set_index('ds'), order=(1,1,0), seasonal_order=(0,1,0,12), freq='MS')
model_fit = model.fit()

# summary of fit model
print(model_fit.summary())

# line plot of residuals
model_fit.plot_diagnostics(figsize=(15, 12))
plt.show()
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                  132
Model:             ARIMA(1, 1, 0)x(0, 1, 0, 12)   Log Likelihood                -447.951
Date:                          Mon, 13 Oct 2025   AIC                            899.902
Time:                                  02:04:21   BIC                            905.460
Sample:                              01-01-1949   HQIC                           902.159
                                   - 12-01-1959                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.2431      0.090     -2.697      0.007      -0.420      -0.066
sigma2       108.8757     13.306      8.183      0.000      82.797     134.954
===================================================================================
Ljung-Box (L1) (Q):                   0.02   Jarque-Bera (JB):                 0.57
Prob(Q):                              0.89   Prob(JB):                         0.75
Heteroskedasticity (H):               1.47   Skew:                            -0.03
Prob(H) (two-sided):                  0.23   Kurtosis:                         3.33
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

Código
forecast_results_sarima = model_fit.get_forecast(steps=12)
forecast_results_sarima.predicted_mean
predicted_mean
1960-01-01 424.109830
1960-02-01 407.055669
1960-03-01 470.825702
1960-04-01 460.881615
1960-05-01 484.868020
1960-06-01 536.871326
1960-07-01 612.870522
1960-08-01 623.870717
1960-09-01 527.870670
1960-10-01 471.870681
1960-11-01 426.870679
1960-12-01 469.870679

Código
# RMSE SARIMA: best model

y_true = df_test['y']
y_pred = forecast_results_sarima.predicted_mean.values
np.sqrt(mean_squared_error(y_true, y_pred))
np.float64(23.93167319781741)
De volta ao topo