Aggregation of ML and Parametrization (2019)

Author

Sothea HAS

Installing and importing packages

Code

import sys 
import netCDF4
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde as kde
import pandas as pd
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import seaborn as sns

Aggregation 1: Gradient COBRA

with, for example, \[K_h(x)=\exp(-\|x/h\|^2).\]

One needs to estimate the best smoothing parameter $h$ for the aggregation.

Aggregation 2: MixCOBRA

with, for example, \[K_{a,b}(x)=\exp(-\|(x/a,y/b)\|^2).\]

One needs to estimate $a$ and $b$.

Aggregation 3: Super learner

Report result

The following result are:

The first graphic: time series of true gravity wave momentum fluxes and the three aggregation methods.
The second graphic: PDF of the shown time series (logarithmic scale on the y axis).
The barplot of correlations of the 3 ML methods and the 11 parametrizations.

Code

def av_resolution(resulution, y):
    a = len(y) // resulution
    r = len(y) % a
    if r < resulution/2:
        group = np.concatenate((np.repeat(list(range(0,a)), resulution), np.repeat(a-1, r)))
    else:
        group = np.concatenate((np.repeat(list(range(0,a)), resulution), np.repeat(a, r)))
    return y.groupby(by = group).mean()

Code

from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error

version = 4
typ = 'abs'
# ml_coef = {
#     '1' : [0.56, 0.57, 0.58],
#     '2' : [0.70, 0.67, 0.74],
#     '3' : [0.45, 0.48, 0.49],
#     '4' : [0.44, 0.43, 0.47],
#     '5' : [0.51, 0.56, 0.55],
#     '6' : [0.72, 0.74, 0.75],
#     '7' : [0.51, 0.53, 0.48],
#     '8' : [0.74, 0.76, 0.72]
# }
cols = {'y_test': 'black', 'Agg1' : '#4095C2', 'Agg2': '#558C5E', 'Agg3': '#D45A56'}
index_bal_day = [0, 107, 210, 311, 377, 457, 515, 598, 675]
path0 = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/models/output_ml_param_both_v2/output/"
path = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/models/output_ml_param/output/new_result_2024/result_param3/"
path1 = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/models/output_ml_param/output/"
ml_pred = xr.open_dataset(path + f'pred_ml_2019_{typ}.nc')
rf_pred0 = av_resolution(8, pd.DataFrame(ml_pred['rf'].values)).values.reshape(-1)
extra_pred0 = av_resolution(8, pd.DataFrame(ml_pred['extra'].values)).values.reshape(-1)
boost_pred0 = av_resolution(8, pd.DataFrame(ml_pred['boost'].values)).values.reshape(-1)
# subfig_title = ["ML","ML + Parametrization","ML + LMDz", "Parametrization"]
subfig_title = ["ML","ML + 3 Params","3 Params", 'Agg_Param_All_No_OldCMAM']
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{}, {}],[{}, {}]],
    horizontal_spacing = 0.075,
    vertical_spacing= 0.09,
    subplot_titles=("Among MLs","MLs with 3 Params","Among the 3 Params", "Among all params except 'Old CMAM'"))

fig_scatter = make_subplots(
    rows=2, cols=2,
    specs=[[{}, {}],[{}, {}]],
    horizontal_spacing = 0.075,
    vertical_spacing= 0.09,
    subplot_titles=("Among MLs","MLs with 3 Params","Among the 3 Params", "Among all params except 'Old CMAM'"))
sns.set()
first = True
rs = {1 : 1, 2: 1, 3: 2, 4: 2}
cs = {1 : 1, 2: 2, 3: 1, 4: 2}

all_correlation_coef = []
all_rmse = []

for i in range(8): 
    df_ml = xr.open_dataset(path + f'pred_bal{i+1}_u1h_{typ}_ML_v6.nc')
    df_ml_param = xr.open_dataset(path + f'pred_bal{i+1}_u1h_{typ}_ML_Param_3M.nc')
    df_param = xr.open_dataset(path + f'pred_bal{i+1}_u1h_{typ}_Param_No_OldCMAM_v6.nc')
    df_ml_lmdz = xr.open_dataset(path + f'pred_bal{i+1}_u1h_{typ}_Param_3M.nc') 
    all_pred = {'ml' : df_ml, 'param' : df_param, 'ml_param' : df_ml_param, 'ml_lmdz' : df_ml_lmdz}
    k = 1
    show = True
    fig.data = []
    fig_scatter.data = []
    f, axs = plt.subplots(nrows=2, ncols=2, figsize=(8, 6))
    f.tight_layout(pad=2.0)
    corr_agg = []
    rmse_agg = []
    corr_type = []
    agg_type = []
    jj = 1
    nam = {'ml': 'Agg_ML', 'ml_param': 'ML_Param_3M', 'ml_lmdz': 'Param_3M', 'param': 'Agg_Param_All_No_OldCMAM'}
    for j in ['ml', 'ml_param', 'ml_lmdz', 'param']:
        pred1 = 1000*av_resolution(8, pd.DataFrame(all_pred[j].variables['agg1'][:].values)).values.reshape(-1)
        pred2 = 1000*av_resolution(8, pd.DataFrame(all_pred[j].variables['agg2'][:].values)).values.reshape(-1)
        pred3 = 1000*av_resolution(8, pd.DataFrame(all_pred[j].variables['agg3'][:].values)).values.reshape(-1)
        # pred3 = av_resolution(8, pd.DataFrame(all_pred[j].variables['agg3'][:].values)).values.reshape(-1)
        y_test1 = 1000*av_resolution(8, pd.DataFrame(all_pred[j].variables['y_test'][:].values)).values.reshape(-1)

        rmse1 = np.round(np.sqrt(mean_squared_error(y_test1, pred1)), 5)
        coef1 = np.round(np.corrcoef(y_test1, pred1)[0,1], 2)

        rmse2 = np.round(np.sqrt(mean_squared_error(y_test1, pred2)), 5)
        coef2 = np.round(np.corrcoef(y_test1, pred2)[0,1], 2)

        rmse3 = np.round(np.sqrt(mean_squared_error(y_test1, pred3)), 5)
        coef3 = np.round(np.corrcoef(y_test1, pred3)[0,1], 2)
        corr_agg.extend([coef1, coef2, coef3])
        rmse_agg.extend([rmse1, rmse2, rmse3])
        # print(rmse_agg)
        corr_type.extend([nam[j]+'1', nam[j]+'2', nam[j]+'3'])
        agg_type.extend(['Agg1', 'Agg2', 'Agg3'])
        if j == 'ml':
            pred_rf = 1000*np.array(rf_pred0[index_bal_day[i]:index_bal_day[i+1]])
            pred_ex = 1000*np.array(extra_pred0[index_bal_day[i]:index_bal_day[i+1]])
            pred_boost = 1000*np.array(boost_pred0[index_bal_day[i]:index_bal_day[i+1]])
            coef_rf = np.round(np.corrcoef(y_test1, pred_rf)[0,1], 2)
            coef_ex = np.round(np.corrcoef(y_test1, pred_ex)[0,1], 2)
            coef_boost = np.round(np.corrcoef(y_test1, pred_boost)[0,1], 2)

            rmse_rf = np.round(np.sqrt(mean_squared_error(y_test1, pred_rf)),5)
            rmse_ex = np.round(np.sqrt(mean_squared_error(y_test1, pred_ex)),5)
            rmse_boost = np.round(np.sqrt(mean_squared_error(y_test1, pred_boost)), 5)
        aggregations = pd.DataFrame(
            {
                'True GWMF' : y_test1,
                'Agg 1 ({})'.format(coef1) : pred1,
                'Agg 2 ({})'.format(coef2) : pred2,
                'Agg 3 ({})'.format(coef3) : pred3
            })

        fig.add_trace(go.Scatter(
            x=list(range(1, len(y_test1)+1)), 
            y=y_test1, 
            name = "True",
            mode="lines",
            showlegend=show,
            line = dict(color = "#2474B4")),
                        row=rs[k], col=cs[k])
        fig.add_trace(go.Scatter(
            x=list(range(1, len(y_test1))), 
            y=pred1, 
            name = "Agg1",
            mode="lines",
            showlegend=show,
            line = dict(color = "#E09C39")),
                        row=rs[k], col=cs[k])
        fig.add_trace(go.Scatter(
            x=list(range(1, len(y_test1)+1)), 
            y=pred2, 
            name = "Agg2",
            mode="lines",
            showlegend=show,
            line = dict(color = "#52A23C")),
                        row=rs[k], col=cs[k])
        fig.add_trace(go.Scatter(
            x=list(range(1, len(y_test1)+1)), 
            y=pred3, 
            name = "Agg3",
            mode="lines",
            showlegend=show,
            line = dict(color = "#B83813")),
                        row=rs[k], col=cs[k])
        if k == 1:
            xaxis = 'xaxis'
            yaxis = 'yaxis'
        else:
            xaxis = 'xaxis' + str(k)
            yaxis = 'yaxis' + str(k)
        if k in [3,4]:
            fig['layout'][xaxis]['title'] = "Flight duration (day)"
        if k in [1,3]:
            fig['layout'][yaxis]['title'] = "GWMF (mPa)"
        # fig['layout'][yaxis].update(range=[0, np.max(np.max(pred1, pred2, pred3))])
        # Scatterplot
        fig_scatter.add_trace(go.Scatter(
            x=y_test1, 
            y=y_test1, 
            name = "True",
            mode="lines",
            opacity=0.75,
            showlegend=show,
            line = dict(color = "#2474B4")),
                        row=rs[k], col=cs[k])
        fig_scatter.add_trace(go.Scatter(
            x=y_test1, 
            y=pred1,
            name = "Agg1",
            mode="markers",
            opacity=0.75,
            showlegend=show,
            marker = dict(color = "#E09C39")),
                        row=rs[k], col=cs[k])
        fig_scatter.add_trace(go.Scatter(
            x=y_test1, 
            y=pred2, 
            name = "Agg2",
            mode="markers",
            opacity=0.75,
            showlegend=show,
            marker = dict(color = "#52A23C")),
                        row=rs[k], col=cs[k])
        fig_scatter.add_trace(go.Scatter(
            x=y_test1,
            y=pred3, 
            name = "Agg3",
            mode="markers",
            opacity=0.75,
            showlegend=show,
            marker = dict(color = "#B83813")),
                        row=rs[k], col=cs[k])
        if k in [3,4]:
            fig_scatter['layout'][xaxis]['title'] = "Reference GWMF (mPa)"
        if k in [1,3]:
            fig_scatter['layout'][yaxis]['title'] = "Prediction (mPa)"
        fig_scatter['layout'][yaxis]['range'] = [0, np.max([pred1, pred2, pred3])*1.1]
        sns.histplot(data=aggregations, ax = axs[rs[k]-1, cs[k]-1], legend=True, palette = list(cols.values()))
        # axs[rs[k]-1, cs[k]-1].set_yscale('log')
        axs[rs[k]-1, cs[k]-1].set_title(subfig_title[k-1])
        # axs[(k-1) // 2, (k-1) % 2].set_xticks(axs[(k-1) // 2, (k-1) % 2].get_xticks(), rotation = 30)
        if k == 1:
            show = False
        k += 1
    path1 = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Dataset/for_Sothea/phase1/"
    param = pd.read_csv(path1 + f"Pred_Balloon_{i+1}_Amplitude_hourly.DAT")
    param = param.drop(columns="old_CMAM")
    cor_param = [np.round(np.corrcoef(av_resolution(24, param[param.columns[i]]).values.reshape(-1), y_test1), 2)[0,1] for i in range(param.shape[1])]
    rmse_param = [np.round(np.sqrt(mean_squared_error(1000*av_resolution(24, param[param.columns[i]]).values.reshape(-1), y_test1)), 5) for i in range(param.shape[1])]
    agg_methods_ = np.repeat(['Agg_ML', 'Agg_ML_3Param', 'Agg_3Param', 'Agg_all_Param'], 3)
    bar_data = pd.DataFrame({
        'Corr' : [coef_rf, coef_ex, coef_boost] + corr_agg + cor_param ,
        'Method' : ['RF', 'Extra', 'Boost'] + corr_type + list(param.columns),
        'Type' : ['ML', 'ML', 'ML'] + agg_type + list(np.repeat('Param', param.shape[1]))
    })
    all_correlation_coef.append([coef_rf, coef_ex, coef_boost] + corr_agg + cor_param)
    fig.update_layout(title_text="Aggregations on balloon {}".format(i+1), width = 820, height = 600)
    fig.show()
    fig_scatter.update_layout(title_text="Scatterplots on {}".format(i+1), width = 820, height = 600)
    fig_scatter.show()
    plt.figure(figsize=(9,5))
    ax = sns.barplot(
        bar_data,
        x = "Method",
        y = "Corr",
        hue = 'Type'
    )
    ax.set_xticklabels(bar_data.Method, rotation=45, ha="right")
    ax.bar_label(ax.containers[0], rotation = 45)
    ax.bar_label(ax.containers[1], rotation = 45)
    ax.bar_label(ax.containers[2], rotation = 45)
    ax.bar_label(ax.containers[3], rotation = 45)
    ax.bar_label(ax.containers[4], rotation = 45)
    plt.title("Correlations of all the methods.", fontsize = 15)
    plt.show()
    rmse_data = pd.DataFrame({
        'RMSE' : np.array([rmse_rf, rmse_ex, rmse_boost] + rmse_agg + rmse_param),
        'Method' : ['RF', 'Extra', 'Boost'] + corr_type + list(param.columns),
        'Type' : ['ML', 'ML', 'ML'] + list(agg_methods_) + list(np.repeat('Param', param.shape[1]))
    })
    ax2 = sns.barplot(
        rmse_data,
        x = "Method",
        y = "RMSE",
        hue = 'Type'
    )
    ax2.set_xticklabels(rmse_data.Method, rotation=45, ha="right")
    ax2.bar_label(ax2.containers[0], rotation = 85)
    ax2.bar_label(ax2.containers[1], rotation = 85)
    ax2.bar_label(ax2.containers[2], rotation = 85)
    ax2.bar_label(ax2.containers[3], rotation = 85)
    ax2.bar_label(ax2.containers[4], rotation = 85)
    ax2.bar_label(ax2.containers[5], rotation = 85)
    ax2.set_ylabel("RMSE (mPa)", fontsize = 15)
    plt.title("RMSE (mPa) of all the methods.", fontsize = 15)
    plt.show()
    print("Balloon " + str(i+1) + "... Done!")

df = pd.DataFrame(all_correlation_coef, columns = ['RF', 'Extra', 'Boost'] + corr_type + list(param.columns))
df.to_csv("C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/Aggregation_with_3_Parameterization/Slides/all_corr_abs.csv")

df_rmse = pd.DataFrame(all_rmse, columns = ['RF', 'Extra', 'Boost'] + corr_type + list(param.columns))
df_rmse.to_csv("C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/Aggregation_with_3_Parameterization/Slides/all_rmse_abs.csv")

Balloon 1... Done!
Balloon 2... Done!
Balloon 3... Done!
Balloon 4... Done!
Balloon 5... Done!
Balloon 6... Done!
Balloon 7... Done!
Balloon 8... Done!

--- title: "Aggregation of ML and Parametrization (2019)" author: "[`Sothea HAS`](https://hassothea.github.io/)" format: html: anchor-sections: true code-tools: true code-fold: true toc: false toc-depth: 3 jupyter: python3 --- # Installing and importing packages ```{python} import sys import netCDF4 import xarray as xr import numpy as np import matplotlib.pyplot as plt from scipy.stats import gaussian_kde as kde import pandas as pd import plotly.graph_objects as go from sklearn.preprocessing import StandardScaler, MinMaxScaler import seaborn as sns ``` # Aggregation 1: Gradient COBRA ![](./fig_agg1.png){width="1000px"} with, for example, $$K_h(x)=\exp(-\|x/h\|^2).$$ One needs to estimate the best smoothing parameter $h$ for the aggregation. # Aggregation 2: MixCOBRA ![](./fig_agg2.png){width="1000px"} with, for example, $$K_{a,b}(x)=\exp(-\|(x/a,y/b)\|^2).$$ One needs to estimate $a$ and $b$. # Aggregation 3: Super learner ![](./fig_agg3.png){width="1000px"} # Report result The following result are: - The first graphic: time series of true gravity wave momentum fluxes and the three aggregation methods. - The second graphic: PDF of the shown time series (logarithmic scale on the y axis). - The barplot of correlations of the 3 ML methods and the 11 parametrizations. ```{python} def av_resolution(resulution, y): a = len(y) // resulution r = len(y) % a if r < resulution/2: group = np.concatenate((np.repeat(list(range(0,a)), resulution), np.repeat(a-1, r))) else: group = np.concatenate((np.repeat(list(range(0,a)), resulution), np.repeat(a, r))) return y.groupby(by = group).mean() ``` ```{python} #| warning: false #| messages: false from plotly.subplots import make_subplots import plotly.graph_objects as go from sklearn.metrics import mean_squared_error version = 4 typ = 'abs' # ml_coef = { # '1' : [0.56, 0.57, 0.58], # '2' : [0.70, 0.67, 0.74], # '3' : [0.45, 0.48, 0.49], # '4' : [0.44, 0.43, 0.47], # '5' : [0.51, 0.56, 0.55], # '6' : [0.72, 0.74, 0.75], # '7' : [0.51, 0.53, 0.48], # '8' : [0.74, 0.76, 0.72] # } cols = {'y_test': 'black', 'Agg1' : '#4095C2', 'Agg2': '#558C5E', 'Agg3': '#D45A56'} index_bal_day = [0, 107, 210, 311, 377, 457, 515, 598, 675] path0 = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/models/output_ml_param_both_v2/output/" path = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/models/output_ml_param/output/new_result_2024/result_param3/" path1 = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/models/output_ml_param/output/" ml_pred = xr.open_dataset(path + f'pred_ml_2019_{typ}.nc') rf_pred0 = av_resolution(8, pd.DataFrame(ml_pred['rf'].values)).values.reshape(-1) extra_pred0 = av_resolution(8, pd.DataFrame(ml_pred['extra'].values)).values.reshape(-1) boost_pred0 = av_resolution(8, pd.DataFrame(ml_pred['boost'].values)).values.reshape(-1) # subfig_title = ["ML","ML + Parametrization","ML + LMDz", "Parametrization"] subfig_title = ["ML","ML + 3 Params","3 Params", 'Agg_Param_All_No_OldCMAM'] fig = make_subplots( rows=2, cols=2, specs=[[{}, {}],[{}, {}]], horizontal_spacing = 0.075, vertical_spacing= 0.09, subplot_titles=("Among MLs","MLs with 3 Params","Among the 3 Params", "Among all params except 'Old CMAM'")) fig_scatter = make_subplots( rows=2, cols=2, specs=[[{}, {}],[{}, {}]], horizontal_spacing = 0.075, vertical_spacing= 0.09, subplot_titles=("Among MLs","MLs with 3 Params","Among the 3 Params", "Among all params except 'Old CMAM'")) sns.set() first = True rs = {1 : 1, 2: 1, 3: 2, 4: 2} cs = {1 : 1, 2: 2, 3: 1, 4: 2} all_correlation_coef = [] all_rmse = [] for i in range(8): df_ml = xr.open_dataset(path + f'pred_bal{i+1}_u1h_{typ}_ML_v6.nc') df_ml_param = xr.open_dataset(path + f'pred_bal{i+1}_u1h_{typ}_ML_Param_3M.nc') df_param = xr.open_dataset(path + f'pred_bal{i+1}_u1h_{typ}_Param_No_OldCMAM_v6.nc') df_ml_lmdz = xr.open_dataset(path + f'pred_bal{i+1}_u1h_{typ}_Param_3M.nc') all_pred = {'ml' : df_ml, 'param' : df_param, 'ml_param' : df_ml_param, 'ml_lmdz' : df_ml_lmdz} k = 1 show = True fig.data = [] fig_scatter.data = [] f, axs = plt.subplots(nrows=2, ncols=2, figsize=(8, 6)) f.tight_layout(pad=2.0) corr_agg = [] rmse_agg = [] corr_type = [] agg_type = [] jj = 1 nam = {'ml': 'Agg_ML', 'ml_param': 'ML_Param_3M', 'ml_lmdz': 'Param_3M', 'param': 'Agg_Param_All_No_OldCMAM'} for j in ['ml', 'ml_param', 'ml_lmdz', 'param']: pred1 = 1000*av_resolution(8, pd.DataFrame(all_pred[j].variables['agg1'][:].values)).values.reshape(-1) pred2 = 1000*av_resolution(8, pd.DataFrame(all_pred[j].variables['agg2'][:].values)).values.reshape(-1) pred3 = 1000*av_resolution(8, pd.DataFrame(all_pred[j].variables['agg3'][:].values)).values.reshape(-1) # pred3 = av_resolution(8, pd.DataFrame(all_pred[j].variables['agg3'][:].values)).values.reshape(-1) y_test1 = 1000*av_resolution(8, pd.DataFrame(all_pred[j].variables['y_test'][:].values)).values.reshape(-1) rmse1 = np.round(np.sqrt(mean_squared_error(y_test1, pred1)), 5) coef1 = np.round(np.corrcoef(y_test1, pred1)[0,1], 2) rmse2 = np.round(np.sqrt(mean_squared_error(y_test1, pred2)), 5) coef2 = np.round(np.corrcoef(y_test1, pred2)[0,1], 2) rmse3 = np.round(np.sqrt(mean_squared_error(y_test1, pred3)), 5) coef3 = np.round(np.corrcoef(y_test1, pred3)[0,1], 2) corr_agg.extend([coef1, coef2, coef3]) rmse_agg.extend([rmse1, rmse2, rmse3]) # print(rmse_agg) corr_type.extend([nam[j]+'1', nam[j]+'2', nam[j]+'3']) agg_type.extend(['Agg1', 'Agg2', 'Agg3']) if j == 'ml': pred_rf = 1000*np.array(rf_pred0[index_bal_day[i]:index_bal_day[i+1]]) pred_ex = 1000*np.array(extra_pred0[index_bal_day[i]:index_bal_day[i+1]]) pred_boost = 1000*np.array(boost_pred0[index_bal_day[i]:index_bal_day[i+1]]) coef_rf = np.round(np.corrcoef(y_test1, pred_rf)[0,1], 2) coef_ex = np.round(np.corrcoef(y_test1, pred_ex)[0,1], 2) coef_boost = np.round(np.corrcoef(y_test1, pred_boost)[0,1], 2) rmse_rf = np.round(np.sqrt(mean_squared_error(y_test1, pred_rf)),5) rmse_ex = np.round(np.sqrt(mean_squared_error(y_test1, pred_ex)),5) rmse_boost = np.round(np.sqrt(mean_squared_error(y_test1, pred_boost)), 5) aggregations = pd.DataFrame( { 'True GWMF' : y_test1, 'Agg 1 ({})'.format(coef1) : pred1, 'Agg 2 ({})'.format(coef2) : pred2, 'Agg 3 ({})'.format(coef3) : pred3 }) fig.add_trace(go.Scatter( x=list(range(1, len(y_test1)+1)), y=y_test1, name = "True", mode="lines", showlegend=show, line = dict(color = "#2474B4")), row=rs[k], col=cs[k]) fig.add_trace(go.Scatter( x=list(range(1, len(y_test1))), y=pred1, name = "Agg1", mode="lines", showlegend=show, line = dict(color = "#E09C39")), row=rs[k], col=cs[k]) fig.add_trace(go.Scatter( x=list(range(1, len(y_test1)+1)), y=pred2, name = "Agg2", mode="lines", showlegend=show, line = dict(color = "#52A23C")), row=rs[k], col=cs[k]) fig.add_trace(go.Scatter( x=list(range(1, len(y_test1)+1)), y=pred3, name = "Agg3", mode="lines", showlegend=show, line = dict(color = "#B83813")), row=rs[k], col=cs[k]) if k == 1: xaxis = 'xaxis' yaxis = 'yaxis' else: xaxis = 'xaxis' + str(k) yaxis = 'yaxis' + str(k) if k in [3,4]: fig['layout'][xaxis]['title'] = "Flight duration (day)" if k in [1,3]: fig['layout'][yaxis]['title'] = "GWMF (mPa)" # fig['layout'][yaxis].update(range=[0, np.max(np.max(pred1, pred2, pred3))]) # Scatterplot fig_scatter.add_trace(go.Scatter( x=y_test1, y=y_test1, name = "True", mode="lines", opacity=0.75, showlegend=show, line = dict(color = "#2474B4")), row=rs[k], col=cs[k]) fig_scatter.add_trace(go.Scatter( x=y_test1, y=pred1, name = "Agg1", mode="markers", opacity=0.75, showlegend=show, marker = dict(color = "#E09C39")), row=rs[k], col=cs[k]) fig_scatter.add_trace(go.Scatter( x=y_test1, y=pred2, name = "Agg2", mode="markers", opacity=0.75, showlegend=show, marker = dict(color = "#52A23C")), row=rs[k], col=cs[k]) fig_scatter.add_trace(go.Scatter( x=y_test1, y=pred3, name = "Agg3", mode="markers", opacity=0.75, showlegend=show, marker = dict(color = "#B83813")), row=rs[k], col=cs[k]) if k in [3,4]: fig_scatter['layout'][xaxis]['title'] = "Reference GWMF (mPa)" if k in [1,3]: fig_scatter['layout'][yaxis]['title'] = "Prediction (mPa)" fig_scatter['layout'][yaxis]['range'] = [0, np.max([pred1, pred2, pred3])*1.1] sns.histplot(data=aggregations, ax = axs[rs[k]-1, cs[k]-1], legend=True, palette = list(cols.values())) # axs[rs[k]-1, cs[k]-1].set_yscale('log') axs[rs[k]-1, cs[k]-1].set_title(subfig_title[k-1]) # axs[(k-1) // 2, (k-1) % 2].set_xticks(axs[(k-1) // 2, (k-1) % 2].get_xticks(), rotation = 30) if k == 1: show = False k += 1 path1 = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Dataset/for_Sothea/phase1/" param = pd.read_csv(path1 + f"Pred_Balloon_{i+1}_Amplitude_hourly.DAT") param = param.drop(columns="old_CMAM") cor_param = [np.round(np.corrcoef(av_resolution(24, param[param.columns[i]]).values.reshape(-1), y_test1), 2)[0,1] for i in range(param.shape[1])] rmse_param = [np.round(np.sqrt(mean_squared_error(1000*av_resolution(24, param[param.columns[i]]).values.reshape(-1), y_test1)), 5) for i in range(param.shape[1])] agg_methods_ = np.repeat(['Agg_ML', 'Agg_ML_3Param', 'Agg_3Param', 'Agg_all_Param'], 3) bar_data = pd.DataFrame({ 'Corr' : [coef_rf, coef_ex, coef_boost] + corr_agg + cor_param , 'Method' : ['RF', 'Extra', 'Boost'] + corr_type + list(param.columns), 'Type' : ['ML', 'ML', 'ML'] + agg_type + list(np.repeat('Param', param.shape[1])) }) all_correlation_coef.append([coef_rf, coef_ex, coef_boost] + corr_agg + cor_param) fig.update_layout(title_text="Aggregations on balloon {}".format(i+1), width = 820, height = 600) fig.show() fig_scatter.update_layout(title_text="Scatterplots on {}".format(i+1), width = 820, height = 600) fig_scatter.show() plt.figure(figsize=(9,5)) ax = sns.barplot( bar_data, x = "Method", y = "Corr", hue = 'Type' ) ax.set_xticklabels(bar_data.Method, rotation=45, ha="right") ax.bar_label(ax.containers[0], rotation = 45) ax.bar_label(ax.containers[1], rotation = 45) ax.bar_label(ax.containers[2], rotation = 45) ax.bar_label(ax.containers[3], rotation = 45) ax.bar_label(ax.containers[4], rotation = 45) plt.title("Correlations of all the methods.", fontsize = 15) plt.show() rmse_data = pd.DataFrame({ 'RMSE' : np.array([rmse_rf, rmse_ex, rmse_boost] + rmse_agg + rmse_param), 'Method' : ['RF', 'Extra', 'Boost'] + corr_type + list(param.columns), 'Type' : ['ML', 'ML', 'ML'] + list(agg_methods_) + list(np.repeat('Param', param.shape[1])) }) ax2 = sns.barplot( rmse_data, x = "Method", y = "RMSE", hue = 'Type' ) ax2.set_xticklabels(rmse_data.Method, rotation=45, ha="right") ax2.bar_label(ax2.containers[0], rotation = 85) ax2.bar_label(ax2.containers[1], rotation = 85) ax2.bar_label(ax2.containers[2], rotation = 85) ax2.bar_label(ax2.containers[3], rotation = 85) ax2.bar_label(ax2.containers[4], rotation = 85) ax2.bar_label(ax2.containers[5], rotation = 85) ax2.set_ylabel("RMSE (mPa)", fontsize = 15) plt.title("RMSE (mPa) of all the methods.", fontsize = 15) plt.show() print("Balloon " + str(i+1) + "... Done!") df = pd.DataFrame(all_correlation_coef, columns = ['RF', 'Extra', 'Boost'] + corr_type + list(param.columns)) df.to_csv("C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/Aggregation_with_3_Parameterization/Slides/all_corr_abs.csv") df_rmse = pd.DataFrame(all_rmse, columns = ['RF', 'Extra', 'Boost'] + corr_type + list(param.columns)) df_rmse.to_csv("C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/Aggregation_with_3_Parameterization/Slides/all_rmse_abs.csv") ```