---
title: "Aggregation of ML and Parametrization (2019)"
author: "[`Sothea HAS`](https://hassothea.github.io/)"
format:
html:
anchor-sections: true
code-tools: true
code-fold: true
toc: false
toc-depth: 3
jupyter: python3
---
# Installing and importing packages
```{python}
import sys
import netCDF4
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde as kde
import pandas as pd
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import seaborn as sns
```
# Aggregation 1:
 {width="1000px"}
with, for example, $$K_h(x)=\exp(-\|x/h\|^2).$$
# Aggregation 2:
 {width="1000px"}
with, for example, $$K_{a,b}(x)=\exp(-\|(x/a,y/b)\|^2).$$
# Aggregation 3:
 {width="1000px"}
# Report result
The following result are:
- The first graphic: time series of true gravity wave momentum fluxes and the three aggregation methods.
- The second graphic: PDF of the shown time series (logarithmic scale on the y axis).
- The barplot of correlations of the 3 ML methods and the 11 parametrizations.
```{python}
def av_resolution(resulution, y):
a = len (y) // resulution
r = len (y) % a
if r < resulution/ 2 :
group = np.concatenate((np.repeat(list (range (0 ,a)), resulution), np.repeat(a- 1 , r)))
else :
group = np.concatenate((np.repeat(list (range (0 ,a)), resulution), np.repeat(a, r)))
return y.groupby(by = group).mean()
```
```{python}
#| warning: false
#| messages: false
from plotly.subplots import make_subplots
import plotly.graph_objects as go
# ml_coef = {
# '1' : [0.56, 0.57, 0.58],
# '2' : [0.70, 0.67, 0.74],
# '3' : [0.45, 0.48, 0.49],
# '4' : [0.44, 0.43, 0.47],
# '5' : [0.51, 0.56, 0.55],
# '6' : [0.72, 0.74, 0.75],
# '7' : [0.51, 0.53, 0.48],
# '8' : [0.74, 0.76, 0.72]
# }
index_bal_day = [0 , 107 , 210 , 311 , 377 , 457 , 515 , 598 , 675 ]
path0 = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/models/output_ml_param_both_v2/output/"
path = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Use2021_predict2019/models/output_ml_param/output/"
ml_pred = xr.open_dataset(path + 'pred_ml_2019_abs5.nc' )
rf_pred0 = av_resolution(8 , pd.DataFrame(ml_pred['rf' ].values)).values.reshape(- 1 )
extra_pred0 = av_resolution(8 , pd.DataFrame(ml_pred['extra' ].values)).values.reshape(- 1 )
boost_pred0 = av_resolution(8 , pd.DataFrame(ml_pred['boost' ].values)).values.reshape(- 1 )
fig = make_subplots(
rows= 1 , cols= 3 ,
specs= [[{}, {}, {}]],
subplot_titles= ("ML" ,"ML + Parametrization" , "Parametrization" ))
sns.set ()
first = True
for i in [0 ,1 ,2 ,3 ,4 ,5 ,6 ,7 ]:
df_ml = xr.open_dataset(path + 'pred_bal {} _u1h_abs_ml_v3.nc' .format (i+ 1 ))
df_ml_param = xr.open_dataset(path + 'pred_bal {} _u1h_abs_ml_plus_param_v3.nc' .format (i+ 1 ))
df_param = xr.open_dataset(path + 'pred_bal {} _u1h_abs_param_v3.nc' .format (i+ 1 ))
all_pred = {'ml' : df_ml, 'param' : df_param, 'ml_param' : df_ml_param}
k = 1
show = True
fig.data = []
f, axs = plt.subplots(nrows= 1 , ncols= 3 , figsize= (15 , 4 ))
for j in ['ml' , 'ml_param' , 'param' ]:
pred1 = av_resolution(8 , pd.DataFrame(all_pred[j].variables['agg1' ][:].values)).values.reshape(- 1 )
pred2 = av_resolution(8 , pd.DataFrame(all_pred[j].variables['agg2' ][:].values)).values.reshape(- 1 )
pred3 = av_resolution(8 , pd.DataFrame(all_pred[j].variables['agg3' ][:].values)).values.reshape(- 1 )
y_test1 = av_resolution(8 , pd.DataFrame(all_pred[j].variables['y_test' ][:].values)).values.reshape(- 1 )
rmae1 = np.round (np.mean(np.abs (y_test1 - pred1))/ np.average(np.abs (y_test1)), 3 )
coef1 = np.round (np.corrcoef(y_test1, pred1)[0 ,1 ], 3 )
rmae2 = np.round (np.mean(np.abs (y_test1 - pred2))/ np.average(np.abs (y_test1)), 3 )
coef2 = np.round (np.corrcoef(y_test1, pred2)[0 ,1 ], 3 )
rmae3 = np.round (np.mean(np.abs (y_test1 - pred3))/ np.average(np.abs (y_test1)), 3 )
coef3 = np.round (np.corrcoef(y_test1, pred3)[0 ,1 ], 3 )
if j == 'ml' :
pred_rf = rf_pred0[index_bal_day[i]:index_bal_day[i+ 1 ]]
pred_ex = extra_pred0[index_bal_day[i]:index_bal_day[i+ 1 ]]
pred_boost = boost_pred0[index_bal_day[i]:index_bal_day[i+ 1 ]]
coef_rf = np.round (np.corrcoef(y_test1, pred_rf)[0 ,1 ], 3 )
coef_ex = np.round (np.corrcoef(y_test1, pred_ex)[0 ,1 ], 3 )
coef_boost = np.round (np.corrcoef(y_test1, pred_boost)[0 ,1 ], 3 )
aggregations = pd.DataFrame(
{
'True GWMF' : y_test1,
'Agg 1 ( {} )' .format (coef1) : pred1,
'Agg 2 ( {} )' .format (coef2) : pred2,
'Agg 3 ( {} )' .format (coef3) : pred3,
# 'RF ({})'.format(coef_rf) : pred_rf,
# 'Extra-trees ({})'.format(coef_ex) : pred_ex,
# 'Adaboost ({})'.format(coef_boost) : pred_boost
})
fig.add_trace(go.Scatter(
x= list (range (1 , len (y_test1)+ 1 )),
y= y_test1,
name = "True" ,
mode= "lines" ,
showlegend= show,
line = dict (color = "#2474B4" )),
row= 1 , col= k)
fig.add_trace(go.Scatter(
x= list (range (1 , len (y_test1)+ 1 )),
y= pred1,
name = "Agg1" ,
mode= "lines" ,
showlegend= show,
line = dict (color = "#E09C39" )),
row= 1 , col= k)
fig.add_trace(go.Scatter(
x= list (range (1 , len (y_test1)+ 1 )),
y= pred2,
name = "Agg2" ,
mode= "lines" ,
showlegend= show,
line = dict (color = "#52A23C" )),
row= 1 , col= k)
fig.add_trace(go.Scatter(
x= list (range (1 , len (y_test1)+ 1 )),
y= pred3,
name = "Agg3" ,
mode= "lines" ,
showlegend= show,
line = dict (color = "#B83813" )),
row= 1 , col= k)
aggregations.plot.density(ax = axs[k- 1 ])
axs[k- 1 ].set_yscale('log' )
if k == 1 :
show = False
k += 1
path1 = "C:/Users/Sothea Has/Postdoc_Sothea/Codes/Second_Part/Dataset/for_Sothea/phase1/"
param = pd.read_csv(path1 + f"Pred_Balloon_ { i+ 1 } _Amplitude_hourly.DAT" )
param = param.drop(columns= "YONSEI" )
cor_param = [np.corrcoef(av_resolution(24 , param[param.columns[i]]).values.reshape(- 1 ), y_test1)[0 ,1 ] for i in range (param.shape[1 ])]
bar_data = pd.DataFrame({
'Corr' : [coef_rf, coef_ex, coef_boost] + cor_param,
'Method' : ['RF' , 'Extra' , 'Boost' ] + list (param.columns),
'Type' : ['ML' , 'ML' , 'ML' ] + list (np.repeat('Param' , param.shape[1 ]))
})
fig.update_layout(title_text= "* Aggregation on balloon {} " .format (i+ 1 ), width = 1400 , height = 400 )
fig.show()
plt.show()
plt.figure(figsize= (9 ,5 ))
ax = sns.barplot(
bar_data,
x = "Method" ,
y = "Corr" ,
hue = 'Type'
)
ax.set_xticklabels(bar_data.Method, rotation= 45 )
ax.bar_label(ax.containers[0 ])
ax.bar_label(ax.containers[1 ])
plt.show()
```