Aggregation of ML and Parametrization (2019)

Author

Installing and importing packages

Code
import sys 
import netCDF4
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde as kde
import pandas as pd
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import seaborn as sns

Aggregation 1:

with, for example, \[K_h(x)=\exp(-\|x/h\|^2).\]

Aggregation 2:

with, for example, \[K_{a,b}(x)=\exp(-\|(x/a,y/b)\|^2).\]

Aggregation 3:

Report result

The following result are:

  • The first graphic: time series of true gravity wave momentum fluxes and the three aggregation methods.
  • The second graphic: PDF of the shown time series (logarithmic scale on the y axis).
  • The barplot of correlations of the 3 ML methods and the 11 parametrizations.
Code
def av_resolution(resulution, y):
    a = len(y) // resulution
    r = len(y) % a
    if r < resulution/2:
        group = np.concatenate((np.repeat(list(range(0,a)), resulution), np.repeat(a-1, r)))
    else:
        group = np.concatenate((np.repeat(list(range(0,a)), resulution), np.repeat(a, r)))
    return y.groupby(by = group).mean()
Code
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# ml_coef = {
#     '1' : [0.56, 0.57, 0.58],
#     '2' : [0.70, 0.67, 0.74],
#     '3' : [0.45, 0.48, 0.49],
#     '4' : [0.44, 0.43, 0.47],
#     '5' : [0.51, 0.56, 0.55],
#     '6' : [0.72, 0.74, 0.75],
#     '7' : [0.51, 0.53, 0.48],
#     '8' : [0.74, 0.76, 0.72]
# }
index_bal_day = [0, 107, 210, 311, 377, 457, 515, 598, 675]
path0 = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/models/output_ml_param_both_v2/output/"
path = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/models/output_ml_param/output/"
ml_pred = xr.open_dataset(path + 'pred_ml_2019_abs5.nc')
rf_pred0 = av_resolution(8, pd.DataFrame(ml_pred['rf'].values)).values.reshape(-1)
extra_pred0 = av_resolution(8, pd.DataFrame(ml_pred['extra'].values)).values.reshape(-1)
boost_pred0 = av_resolution(8, pd.DataFrame(ml_pred['boost'].values)).values.reshape(-1)
fig = make_subplots(
    rows=1, cols=3,
    specs=[[{}, {}, {}]],
    subplot_titles=("ML","ML + Parametrization", "Parametrization"))
sns.set()
first = True
for i in [0,1,2,3,4,5,6,7]:
    df_ml = xr.open_dataset(path + 'pred_bal{}_u1h_abs_ml_v3.nc'.format(i+1))
    df_ml_param = xr.open_dataset(path + 'pred_bal{}_u1h_abs_ml_plus_param_v3.nc'.format(i+1))
    df_param = xr.open_dataset(path + 'pred_bal{}_u1h_abs_param_v3.nc'.format(i+1))
    all_pred = {'ml' : df_ml, 'param' : df_param, 'ml_param' : df_ml_param}
    k = 1
    show = True
    fig.data = []
    f, axs = plt.subplots(nrows=1, ncols=3, figsize=(15, 4))
    for j in ['ml', 'ml_param', 'param']:
        pred1 = av_resolution(8, pd.DataFrame(all_pred[j].variables['agg1'][:].values)).values.reshape(-1)
        pred2 = av_resolution(8, pd.DataFrame(all_pred[j].variables['agg2'][:].values)).values.reshape(-1)
        pred3 = av_resolution(8, pd.DataFrame(all_pred[j].variables['agg3'][:].values)).values.reshape(-1)
        y_test1 = av_resolution(8, pd.DataFrame(all_pred[j].variables['y_test'][:].values)).values.reshape(-1)

        rmae1 = np.round(np.mean(np.abs(y_test1 - pred1))/np.average(np.abs(y_test1)), 3)
        coef1 = np.round(np.corrcoef(y_test1, pred1)[0,1], 3)

        rmae2 = np.round(np.mean(np.abs(y_test1 - pred2))/np.average(np.abs(y_test1)), 3)
        coef2 = np.round(np.corrcoef(y_test1, pred2)[0,1], 3)

        rmae3 = np.round(np.mean(np.abs(y_test1 - pred3))/np.average(np.abs(y_test1)), 3)
        coef3 = np.round(np.corrcoef(y_test1, pred3)[0,1], 3)
        if j == 'ml':
            pred_rf = rf_pred0[index_bal_day[i]:index_bal_day[i+1]]
            pred_ex = extra_pred0[index_bal_day[i]:index_bal_day[i+1]]
            pred_boost = boost_pred0[index_bal_day[i]:index_bal_day[i+1]]
            coef_rf = np.round(np.corrcoef(y_test1, pred_rf)[0,1], 3)
            coef_ex = np.round(np.corrcoef(y_test1, pred_ex)[0,1], 3)
            coef_boost = np.round(np.corrcoef(y_test1, pred_boost)[0,1], 3)
        aggregations = pd.DataFrame(
            {
                'True GWMF' : y_test1,
                'Agg 1 ({})'.format(coef1) : pred1,
                'Agg 2 ({})'.format(coef2) : pred2,
                'Agg 3 ({})'.format(coef3) : pred3,
                # 'RF ({})'.format(coef_rf) : pred_rf,
                # 'Extra-trees ({})'.format(coef_ex) : pred_ex,
                # 'Adaboost ({})'.format(coef_boost) : pred_boost
            })
        fig.add_trace(go.Scatter(
            x=list(range(1, len(y_test1)+1)), 
            y=y_test1, 
            name = "True",
            mode="lines",
            showlegend=show,
            line = dict(color = "#2474B4")),
                        row=1, col=k)
        fig.add_trace(go.Scatter(
            x=list(range(1, len(y_test1)+1)), 
            y=pred1, 
            name = "Agg1",
            mode="lines",
            showlegend=show,
            line = dict(color = "#E09C39")),
                        row=1, col=k)
        fig.add_trace(go.Scatter(
            x=list(range(1, len(y_test1)+1)), 
            y=pred2, 
            name = "Agg2",
            mode="lines",
            showlegend=show,
            line = dict(color = "#52A23C")),
                        row=1, col=k)
        fig.add_trace(go.Scatter(
            x=list(range(1, len(y_test1)+1)), 
            y=pred3, 
            name = "Agg3",
            mode="lines",
            showlegend=show,
            line = dict(color = "#B83813")),
                        row=1, col=k)

        aggregations.plot.density(ax = axs[k-1])
        axs[k-1].set_yscale('log')
        if k == 1:
            show = False
        k += 1
    path1 = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Dataset/for_Sothea/phase1/"
    param = pd.read_csv(path1 + f"Pred_Balloon_{i+1}_Amplitude_hourly.DAT")
    param = param.drop(columns="YONSEI")
    cor_param = [np.corrcoef(av_resolution(24, param[param.columns[i]]).values.reshape(-1), y_test1)[0,1] for i in range(param.shape[1])]
    bar_data = pd.DataFrame({
        'Corr' : [coef_rf, coef_ex, coef_boost] + cor_param,
        'Method' : ['RF', 'Extra', 'Boost'] + list(param.columns),
        'Type' : ['ML', 'ML', 'ML'] + list(np.repeat('Param', param.shape[1]))
    })
    fig.update_layout(title_text="* Aggregation on balloon {}".format(i+1), width = 1400, height = 400)
    fig.show()
    plt.show()
    plt.figure(figsize=(9,5))
    ax = sns.barplot(
        bar_data,
        x = "Method",
        y = "Corr",
        hue = 'Type'
    )
    ax.set_xticklabels(bar_data.Method, rotation=45)
    ax.bar_label(ax.containers[0])
    ax.bar_label(ax.containers[1])
    plt.show()