import pandas as pd
import numpy as np
from macroframe_forecast import MFF
import macroframe_forecast
from string import ascii_uppercase, ascii_lowercase
from sktime.datasets import load_macroeconomic
import matplotlib.pyplot as plt
#%% Reading the data and generating forecasts.
# Reading GDP data as a pandas dataframe.
# This dataframe has two columns: year and GDP. Data from 2024-2029 are WEO forecasts.
from pandas import DataFrame
GDP_data_true = DataFrame({
"year": [
1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959,
1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969,
1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979,
1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989,
1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029,
2030
],
"GDP": [
301782704906.154, 348993057004.926, 368027835977.609, 389147698401.843,
390276672099.46, 424868331217.657, 448388356231.708, 471707274214.225,
478166880805.205, 519476064642.104, 539899866168.654, 558583293630.287,
600454646133.34, 633368190949.311, 680153540812.135, 737201978910.734,
808045440847.441, 853883822469.0601, 933096436159.1281, 1008751520510.61,
1064366709379.28, 1155403629216.3, 1269884411457.22, 1418456050381.57,
1536647924378.57, 1674009506825.93, 1867242215504.46, 2079644632633.34,
2350400768409.49, 2627325000000.0, 2857325000000.0, 3207025000000.0,
3343800000000.0, 3634025000000.0, 4037650000000.0, 4339000000000.0,
4579625000000.0, 4855250000000.0, 5236425000000.0, 5641600000000.0,
5963125000000.0, 6158125000000.0, 6520325000000.0, 6858550000000.0,
7287250000000.0, 7639750000000.0, 8073125000000.0, 8577550000000.0,
9062825000000.0, 9631175000000.0, 10250950000000.0, 10581925000000.0,
10929100000000.0, 11456450000000.0, 12217175000000.0, 13039200000000.0,
13815600000000.0, 14474250000000.0, 14769850000000.0, 14478050000000.0,
15048975000000.0, 15599725000000.0, 16253950000000.0, 16880675000000.0,
17608125000000.0, 18295000000000.0, 18804900000000.0, 19612100000000.0,
20656525000000.0, 21539975000000.0, 21354125000000.0, 23681175000000.0,
26006900000000.0, 27720725000000.0, 29184900000000.0, 30507217002511.25,
31717641479090.75, 32941710359665.25, 34342131994149.0, 35712823521822.0,
37153089058192.75
]
})
# Forecasted GDP growth in 2029 (last year) is as given below
final_year_growth = 100*(GDP_data_true.iloc[-1,1]/GDP_data_true.iloc[-2,1]-1)
# The original GDP data is in dollar numbers, but changing this to billions
# going forward in order to deal with problem of matrix invertibility.
GDP_data_true['GDP'] = GDP_data_true['GDP']/1e12
# Time period hs to be set as the index. Here year is the time identifier,
# therefore setting this as the index.
GDP_data_true.set_index(GDP_data_true['year'], inplace = True)
GDP_data_true.drop(columns = 'year', inplace = True)
# Creating a copy which is used for geenrating the forecasts. Removing the last
# six years of data for ease of forecasts
GDP_data = GDP_data_true.copy()
# Removing the last six years of data so that they are forecasted by the
# function.
GDP_data.iloc[-6:,0] = np.nan
# Now we assume that US GDP grows by 4% from 2028 to 2029, which is given by the
# WEO forecast. This therefore works as a constraint for the forecasts.
# The dataframe has GDP in levels terms, therefore the constraint has to be
# specified in levels terms as well. The constraints can be rewritten in the following
# steps.
# GDP_2029/GDP_2028 - 1 = 0.04
# GDP_2029 = 1.04*GDP_2028
# GDP_2029 - 1.04*GDP_2028 = 0
# Constraints are to be provided in the form of a list, even when there is only
# constraint.
GDP_constraint = ['GDP_2030 - 1.04*GDP_2029']
m = MFF(df = GDP_data,
equality_constraints = GDP_constraint,
parallelize = False)
# Using the fit method generates first as well as second step forecasts.
m.fit()
# First step forecasts are stored as df1 in the fitted object.
firststep_GDP = m.df1
# The forecasted data is filled into the df2 dataframe in the fitted object.
reconciled_GDP = m.df2
# Models are stored in a dataframe in the fitted object.
models_used = m.df1_model
models_used.iloc[-1,0]
#%% Plotting first and second step forecasts
fig, ax = plt.subplots(figsize=(8, 4.8))
firststep_GDP['GDP'].plot(ax=ax, label='First step forecasts', linestyle = '--')
reconciled_GDP['GDP'].plot(ax=ax, label='Final forecasts', linestyle = '-.')
GDP_data['GDP'].plot(ax = ax, label = 'Known values', color = 'red')
ax.set_xlabel('Year')
ax.set_ylabel('US Nominal GDP (in US$ trn)')
ax.set_title('US GDP in levels')
ax.legend(loc = 'lower left')
# max_xlastvalue = reconciled_GDP.index.max()
ax.set_xlim([2020, 2030])
ax.set_ylim([15, 40])
plt.xticks(np.arange(2019, 2031,2))
plt.show()
# %%
firststep_GDP['GDP_growth'] = (firststep_GDP['GDP']/firststep_GDP['GDP'].shift(1) - 1)*100
reconciled_GDP['GDP_growth'] = (reconciled_GDP['GDP']/reconciled_GDP['GDP'].shift(1) - 1)*100
GDP_data['GDP_growth'] = (GDP_data['GDP']/GDP_data['GDP'].shift(1) - 1)*100
fig, ax = plt.subplots(figsize=(8, 4.8))
firststep_GDP['GDP_growth'].plot(ax=ax, label='First-step forecasts', linestyle = '--')
reconciled_GDP['GDP_growth'].plot(ax=ax, label='Second-step forecasts', linestyle = '-.')
GDP_data['GDP_growth'].plot(ax = ax, label = 'Known values', color = 'red')
ax.set_xlabel('Year')
ax.set_ylabel('Nominal GDP growth (annual, %)')
ax.set_title('US GDP growth rates')
ax.legend(loc = 'upper left')
# Add triangle marker at (2029, 4)
ax.plot(2030, 4, marker='v', color='black', markersize=8, label='_nolegend_')
# Add text annotation
ax.annotate('2030 growth constraint', xy=(2030, 4), xytext=(2030-2, 2.5),
arrowprops=dict(arrowstyle='->', color='black'), color='black')
# max_xlastvalue = reconciled_GDP.index.max()
ax.set_xlim([2019, 2031])
plt.xticks(np.arange(2020, 2031,2))
plt.show()
# %% Looking at externally generated first-stage
GDP_forecasts_external = pd.DataFrame({"GDP": [29.0, 31.5, 33, 34.1,36.8, 39]},
index = [2025, 2026, 2027, 2028, 2029, 2030])
# Build MultiIndex using column name
multi_index = pd.MultiIndex.from_product([[GDP_forecasts_external.columns[0]], GDP_forecasts_external.index],
names=[None, 'year'])
# Correct: flatten the 2D array to 1D
GDP_multiindex_series = pd.Series(GDP_forecasts_external.values.ravel(), index=multi_index)
W_alt = pd.DataFrame(np.eye(len(multi_index)), index=multi_index, columns=multi_index) # Create identity matrix with shape (n x n)
smoothness_alt = pd.Series(np.ones(1) * 100, index=[multi_index])
Phi_alt = macroframe_forecast.utils.GenSmoothingMatrix(W_alt, smoothness_alt)
final_forecasts = macroframe_forecast.utils.Reconciliation(y1 = GDP_multiindex_series,
W = m.W, Phi = m.Phi,
C = m.C, d = m.d,
C_ineq = m.C_ineq,
d_ineq = m.d_ineq)
# %%
# Convert MultiIndex Series to regular Series with year index
gdp_to_forecast_series = GDP_data
gdp_series = GDP_multiindex_series.xs('GDP', level=0)
second_stage_series = final_forecasts.xs('GDP', level=0)
# Now plot it
fig, ax = plt.subplots(figsize=(8, 4.8))
gdp_series.plot(ax=ax, label='Externally generated first-step forecasts', linestyle='--')
second_stage_series.iloc[:,0].plot(ax=ax, label='Second-step forecasts', linestyle = '-.')
# Add labels and formatting
ax.set_xlabel('Year')
ax.set_ylabel('US Nominal GDP (in US$ trn)')
ax.set_title('US GDP in levels')
ax.legend(loc='upper left')
ax.set_xlim([2024, 2030])
ax.set_ylim([15, 40])