In [1]:
import IPython.core.display as di
# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)
# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Show/Hide code</button>''', raw=True)
In [2]:
#Allow the created content to be interactivelly ploted inline
%matplotlib inline
#Establish width and height for all plots in the report
#pylab.rcParams['figure.figsize'] = (18, 6) #width, height
In [3]:
#Import needed libraries
import os
from os.path import join, getsize
import pandas as pd
from cycler import cycler
import matplotlib.pyplot as plt
from IPython.display import display
import numpy as np
import collections
import matplotlib as mpl
inline_rc = dict(mpl.rcParams)
#the next cell enables plotting tables without borders

# To hide warnings
import warnings
warnings.filterwarnings('ignore')
In [4]:
%%html
<style>
table,td,tr,th {border:none!important}
</style>

Summary report of the CO2MPAS WLTP to NEDC CO$_2$ emission simulation model - Manual vehicles (v.2.0.x)

In [5]:
#Specify the output folder and file containing the CO2MPAS summary output file.
folder = r'C:\Users\komnodi\Desktop\PYTHON\batch_files_assessment\01.ORIGINAL_IPNBS_UNLEASH'
file = '20180904_181534-summary_MT_UNLEASH.xlsx'
infile = join(folder, file)
df=pd.read_excel(infile, 'anon', header=[0, 1, 2, 3], index_col=[0], skiprows=[4])
In [6]:
#Gather and name the basic variables used in the report according to their name in the CO2MPAS output file
NEDC = df['nedc_h']['prediction']['output']['declared_value']
NEDCl = df['nedc_l']['prediction']['output']['declared_value']
NEDCt = df['nedc_h']['prediction']['target']['value']
UDC = df['nedc_h']['prediction']['output']['UDC']
UDCl = df['nedc_l']['prediction']['output']['UDC']
UDCt = df['nedc_h']['prediction']['target']['UDC']
EUDC = df['nedc_h']['prediction']['output']['EUDC']
EUDCl = df['nedc_l']['prediction']['output']['EUDC']
EUDCt = df['nedc_h']['prediction']['target']['EUDC']
#Obtain the case number and vehicle model from the input file
df['vehicle'] = df.index
cases = df['vehicle'].str.split('_').str[-1].astype('int')
model = df['vehicle'].str.split('_').str[0]
#Create a dataframe with this data
valuesDF = pd.DataFrame({'NEDC': NEDC,'NEDCt':NEDCt, 'dNEDC':NEDC-NEDCt,'dNEDCl':NEDCl-NEDCt,'UDC': UDC,'UDCt':UDCt, 'dUDC':UDC-UDCt,'dUDCl':UDCl-UDCt,'EUDC': EUDC,'EUDCt':EUDCt, 'dEUDC':EUDC-EUDCt,'dEUDCl':EUDCl-EUDCt,'Case':cases,'Model':model})   
#calculate percentages
valuesDF['NEDC-H error [%]'] = pd.Series((valuesDF.dNEDC/valuesDF.NEDCt*100), index=valuesDF.index)
valuesDF['UDC-H error [%]'] = pd.Series((valuesDF.dUDC/valuesDF.UDCt*100), index=valuesDF.index)
valuesDF['EUDC-H error [%]'] = pd.Series((valuesDF.dEUDC/valuesDF.EUDCt*100), index=valuesDF.index)
valuesDF['NEDC-L error [%]'] = pd.Series((valuesDF.dNEDCl/valuesDF.NEDCt*100), index=valuesDF.index)
valuesDF['UDC-L error [%]'] = pd.Series((valuesDF.dUDCl/valuesDF.UDCt*100), index=valuesDF.index)
valuesDF['EUDC-L error [%]'] = pd.Series((valuesDF.dEUDCl/valuesDF.EUDCt*100), index=valuesDF.index)
valuesDF = valuesDF.dropna()
In [7]:
valuesDF['FuelType'] = np.where((valuesDF['Model'] == 'Vehicle02') | (valuesDF['Model'] == 'Vehicle04') | (valuesDF['Model'] == 'Vehicle05') | (valuesDF['Model'] == 'Vehicle06') | (valuesDF['Model'] == 'Vehicle09') | (valuesDF['Model'] == 'Vehicle10') | (valuesDF['Model'] == 'Vehicle11'), 'Gasoline','Diesel')
In [8]:
Gasolines = valuesDF.loc[lambda valuesDF: valuesDF['FuelType'] =='Gasoline', :]
Diesels = valuesDF.loc[lambda valuesDF: valuesDF['FuelType'] =='Diesel', :]

Section 1. Performance of the model. All vehicles and test cases.

Error statistics for CO$_2$ emission per driving cycle

Error statistics for NEDC, UDC, and EUDC CO$_2$ emission

In [9]:
#Create a dataframe with the NECD, UDC, EUDC error statistics
errorsDF = pd.DataFrame(index=['Averages','StdError','Median','Mode','StdDev','Variance','Kurtosis','Skweness','Range','Minimum','Maximum','Sum','Count','Confidence level (95%)'], columns=['NEDC-H [gCO$_2$ km$^{-1}$]','UDC-H [gCO$_2$ km$^{-1}$]', 'EUDC-H [gCO$_2$ km$^{-1}$]', 'NEDC-L [gCO$_2$ km$^{-1}$]','UDC-L [gCO$_2$ km$^{-1}$]', 'EUDC-L [gCO$_2$ km$^{-1}$]'])
errorsDF.loc['Averages'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.mean(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.mean(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.mean(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.mean(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.mean(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.mean(),2)})
errorsDF.loc['StdError'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.sem(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.sem(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.sem(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.sem(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.sem(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.sem(),2)})
errorsDF.loc['Median'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.median(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.median(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.median(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.median(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.median(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.median(),2)})
errorsDF.loc['Mode'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.mode().iloc[0],2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.mode().iloc[0],2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.mode().iloc[0],2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.mode().iloc[0],2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.mode().iloc[0],2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.mode().iloc[0],2)})
errorsDF.loc['StdDev'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.std(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.std(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.std(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.std(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.std(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.std(),2)})
errorsDF.loc['Variance'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.var(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.var(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.var(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.var(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.var(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.var(),2)})
errorsDF.loc['Kurtosis'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.kurtosis(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.kurtosis(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.kurtosis(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.kurtosis(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.kurtosis(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.kurtosis(),2)})
errorsDF.loc['Skweness'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.skew(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.skew(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.skew(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.skew(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.skew(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.skew(),2)})
errorsDF.loc['Range'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round((valuesDF.dNEDC.max()-valuesDF.dNEDC.min()),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round((valuesDF.dUDC.max()-valuesDF.dUDC.min()),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round((valuesDF.dEUDC.max()-valuesDF.dEUDC.min()),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round((valuesDF.dNEDCl.max()-valuesDF.dNEDCl.min()),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round((valuesDF.dUDCl.max()-valuesDF.dUDCl.min()),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round((valuesDF.dEUDCl.max()-valuesDF.dEUDCl.min()),2)})
errorsDF.loc['Minimum'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.min(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.min(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.min(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.min(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.min(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.min(),2)})
errorsDF.loc['Maximum'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.max(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.max(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.max(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.max(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.max(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.max(),2)})
errorsDF.loc['Sum'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.sum(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.sum(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.sum(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.sum(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.sum(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.sum(),2)})
errorsDF.loc['Count'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.count(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.count(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.count(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.count(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.count(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.count(),2)})
errorsDF.loc['Confidence level (95%)'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dNEDC.sem(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dUDC.sem(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dEUDC.sem(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dNEDCl.sem(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dUDCl.sem(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dEUDCl.sem(),2)})
errorsDF
Out[9]:
NEDC-H [gCO$_2$ km$^{-1}$] UDC-H [gCO$_2$ km$^{-1}$] EUDC-H [gCO$_2$ km$^{-1}$] NEDC-L [gCO$_2$ km$^{-1}$] UDC-L [gCO$_2$ km$^{-1}$] EUDC-L [gCO$_2$ km$^{-1}$]
Averages -0.52 -0.71 -0.42 0.34 0.47 0.24
StdError 0.07 0.12 0.06 0.06 0.12 0.04
Median -0.54 -1.11 -0.19 -0.01 -0.15 0.15
Mode -8.66 -7.49 -11.51 -1.13 -4.03 -3.89
StdDev 3.55 5.99 3.17 2.81 5.95 1.96
Variance 12.63 35.92 10.07 7.87 35.45 3.85
Kurtosis 0.64 -0.12 3.82 -0.09 -0.17 2.23
Skweness -0.14 0.23 -1.68 0.36 0.41 -0.87
Range 25.47 37.94 19.44 19.93 39.31 13.27
Minimum -16.17 -20.18 -13.82 -11.41 -18.96 -7.73
Maximum 9.31 17.76 5.62 8.52 20.35 5.54
Sum -1293.82 -1770.89 -1046.67 840.7 1180.41 610.56
Count 2496 2496 2496 2496 2496 2496
Confidence level (95%) 0.14 0.24 0.12 0.12 0.24 0.08

Error statistics for NEDC, UDC, and EUDC CO$_2$ emission [%]

In [10]:
#Create a dataframe with the NECD, UDC, EUDC error statistics
errorsDF = pd.DataFrame(index=['Averages','StdDev','Minimum','Maximum','Cases in ±4%','Cases in ±2.5%'], columns=['NEDC-H [%]','UDC-H [%]', 'EUDC-H [%]','NEDC-L [%]','UDC-L [%]', 'EUDC-L [%]'])
errorsDF.loc['Averages'] = pd.Series({'NEDC-H [%]':round(valuesDF['NEDC-H error [%]'].mean(),2), 'UDC-H [%]':round(valuesDF['UDC-H error [%]'].mean(),2), 'EUDC-H [%]':round(valuesDF['EUDC-H error [%]'].mean(),2),'NEDC-L [%]':round(valuesDF['NEDC-L error [%]'].mean(),2), 'UDC-L [%]':round(valuesDF['UDC-L error [%]'].mean(),2), 'EUDC-L [%]':round(valuesDF['EUDC-L error [%]'].mean(),2)})
errorsDF.loc['StdDev'] = pd.Series({'NEDC-H [%]':round(valuesDF['NEDC-H error [%]'].std(),2), 'UDC-H [%]':round(valuesDF['UDC-H error [%]'].std(),2), 'EUDC-H [%]':round(valuesDF['EUDC-H error [%]'].std(),2),'NEDC-L [%]':round(valuesDF['NEDC-L error [%]'].std(),2), 'UDC-L [%]':round(valuesDF['UDC-L error [%]'].std(),2), 'EUDC-L [%]':round(valuesDF['EUDC-L error [%]'].std(),2)})
errorsDF.loc['Minimum'] = pd.Series({'NEDC-H [%]':round(valuesDF['NEDC-H error [%]'].min(),2), 'UDC-H [%]':round(valuesDF['UDC-H error [%]'].min(),2), 'EUDC-H [%]':round(valuesDF['EUDC-H error [%]'].min(),2),'NEDC-L [%]':round(valuesDF['NEDC-L error [%]'].min(),2), 'UDC-L [%]':round(valuesDF['UDC-L error [%]'].min(),2), 'EUDC-L [%]':round(valuesDF['EUDC-L error [%]'].min(),2)})
errorsDF.loc['Maximum'] = pd.Series({'NEDC-H [%]':round(valuesDF['NEDC-H error [%]'].max(),2), 'UDC-H [%]':round(valuesDF['UDC-H error [%]'].max(),2), 'EUDC-H [%]':round(valuesDF['EUDC-H error [%]'].max(),2),'NEDC-L [%]':round(valuesDF['NEDC-L error [%]'].max(),2), 'UDC-L [%]':round(valuesDF['UDC-L error [%]'].max(),2), 'EUDC-L [%]':round(valuesDF['EUDC-L error [%]'].max(),2)})
errorsDF.loc['Cases in ±4%'] = pd.Series({'NEDC-H [%]':round(sum(abs(valuesDF['NEDC-H error [%]'])<4)/len(valuesDF['NEDC-H error [%]'])*100,1), 'UDC-H [%]':round(sum(abs(valuesDF['UDC-H error [%]'])<4)/len(valuesDF['UDC-H error [%]'])*100,1), 'EUDC-H [%]':round(sum(abs(valuesDF['EUDC-H error [%]'])<4)/len(valuesDF['EUDC-H error [%]'])*100,1),'NEDC-L [%]':round(sum(abs(valuesDF['NEDC-L error [%]'])<4)/len(valuesDF['NEDC-L error [%]'])*100,1), 'UDC-L [%]':round(sum(abs(valuesDF['UDC-L error [%]'])<4)/len(valuesDF['UDC-L error [%]'])*100,1), 'EUDC-L [%]':round(sum(abs(valuesDF['EUDC-L error [%]'])<4)/len(valuesDF['EUDC-L error [%]'])*100,1)})
errorsDF.loc['Cases in ±2.5%'] = pd.Series({'NEDC-H [%]':round(sum(abs(valuesDF['NEDC-H error [%]'])<2.5)/len(valuesDF['NEDC-H error [%]'])*100,1), 'UDC-H [%]':round(sum(abs(valuesDF['UDC-H error [%]'])<2.5)/len(valuesDF['UDC-H error [%]'])*100,1), 'EUDC-H [%]':round(sum(abs(valuesDF['EUDC-H error [%]'])<2.5)/len(valuesDF['EUDC-H error [%]'])*100,1),'NEDC-L [%]':round(sum(abs(valuesDF['NEDC-L error [%]'])<2.5)/len(valuesDF['NEDC-L error [%]'])*100,1), 'UDC-L [%]':round(sum(abs(valuesDF['UDC-L error [%]'])<2.5)/len(valuesDF['UDC-L error [%]'])*100,1), 'EUDC-L [%]':round(sum(abs(valuesDF['EUDC-L error [%]'])<2.5)/len(valuesDF['EUDC-L error [%]'])*100,1)})
errorsDF.loc['P75-P25'] = pd.Series({'NEDC-H [%]':round(valuesDF['NEDC-H error [%]'].quantile(0.75)-valuesDF['NEDC-H error [%]'].quantile(0.25),2), 'UDC-H [%]':round(valuesDF['UDC-H error [%]'].quantile(0.75)-valuesDF['UDC-H error [%]'].quantile(0.25),2), 'EUDC-H [%]':round(valuesDF['EUDC-H error [%]'].quantile(0.75)-valuesDF['EUDC-H error [%]'].quantile(0.25),2),'NEDC-L [%]':round(valuesDF['NEDC-L error [%]'].quantile(0.75)-valuesDF['NEDC-L error [%]'].quantile(0.25),2), 'UDC-L [%]':round(valuesDF['UDC-L error [%]'].quantile(0.75)-valuesDF['UDC-L error [%]'].quantile(0.25),2), 'EUDC-L [%]':round(valuesDF['EUDC-L error [%]'].quantile(0.75)-valuesDF['EUDC-L error [%]'].quantile(0.25),2)})
errorsDF.loc['Count'] = pd.Series({'NEDC-H [%]':round(valuesDF['NEDC-H error [%]'].count(),2), 'UDC-H [%]':round(valuesDF['UDC-H error [%]'].count(),2), 'EUDC-H [%]':round(valuesDF['EUDC-H error [%]'].count(),2),'NEDC-L [%]':round(valuesDF['NEDC-L error [%]'].count(),2), 'UDC-L [%]':round(valuesDF['UDC-L error [%]'].count(),2), 'EUDC-L [%]':round(valuesDF['EUDC-L error [%]'].count(),2)})
errorsDF.columns.name='ALL'
errorsDF
Out[10]:
ALL NEDC-H [%] UDC-H [%] EUDC-H [%] NEDC-L [%] UDC-L [%] EUDC-L [%]
Averages -0.27 -0.48 -0.06 0.23 0.1 0.37
StdDev 2.5 3.7 2.05 2.08 3.53 1.41
Minimum -7.32 -8.82 -6.84 -4.97 -7.94 -3.68
Maximum 7.14 11.71 5.67 7.23 12.08 4.89
Cases in ±4% 87.4 69.8 92.4 93.5 73.6 99.4
Cases in ±2.5% 62.9 52 79.3 76.4 51.7 84.4
P75-P25 2.95 4.67 2.09 2.62 4.77 1.5
Count 2496 2496 2496 2496 2496 2496
In [20]:
#GASOLINES dataframe with the NECD, UDC, EUDC error statistics
GerrorsDF = pd.DataFrame(index=['Averages','StdDev','Minimum','Maximum','Cases in ±4%','Cases in ±2.5%'], columns=['NEDC-H [%]','UDC-H [%]', 'EUDC-H [%]','NEDC-L [%]','UDC-L [%]', 'EUDC-L [%]'])
GerrorsDF.loc['Averages'] = pd.Series({'NEDC-H [%]':round(Gasolines['NEDC-H error [%]'].mean(),2), 'UDC-H [%]':round(Gasolines['UDC-H error [%]'].mean(),2), 'EUDC-H [%]':round(Gasolines['EUDC-H error [%]'].mean(),2),'NEDC-L [%]':round(Gasolines['NEDC-L error [%]'].mean(),2), 'UDC-L [%]':round(Gasolines['UDC-L error [%]'].mean(),2), 'EUDC-L [%]':round(Gasolines['EUDC-L error [%]'].mean(),2)})
GerrorsDF.loc['StdDev'] = pd.Series({'NEDC-H [%]':round(Gasolines['NEDC-H error [%]'].std(),2), 'UDC-H [%]':round(Gasolines['UDC-H error [%]'].std(),2), 'EUDC-H [%]':round(Gasolines['EUDC-H error [%]'].std(),2),'NEDC-L [%]':round(Gasolines['NEDC-L error [%]'].std(),2), 'UDC-L [%]':round(Gasolines['UDC-L error [%]'].std(),2), 'EUDC-L [%]':round(Gasolines['EUDC-L error [%]'].std(),2)})
GerrorsDF.loc['Minimum'] = pd.Series({'NEDC-H [%]':round(Gasolines['NEDC-H error [%]'].min(),2), 'UDC-H [%]':round(Gasolines['UDC-H error [%]'].min(),2), 'EUDC-H [%]':round(Gasolines['EUDC-H error [%]'].min(),2),'NEDC-L [%]':round(Gasolines['NEDC-L error [%]'].min(),2), 'UDC-L [%]':round(Gasolines['UDC-L error [%]'].min(),2), 'EUDC-L [%]':round(Gasolines['EUDC-L error [%]'].min(),2)})
GerrorsDF.loc['Maximum'] = pd.Series({'NEDC-H [%]':round(Gasolines['NEDC-H error [%]'].max(),2), 'UDC-H [%]':round(Gasolines['UDC-H error [%]'].max(),2), 'EUDC-H [%]':round(Gasolines['EUDC-H error [%]'].max(),2),'NEDC-L [%]':round(Gasolines['NEDC-L error [%]'].max(),2), 'UDC-L [%]':round(Gasolines['UDC-L error [%]'].max(),2), 'EUDC-L [%]':round(Gasolines['EUDC-L error [%]'].max(),2)})
GerrorsDF.loc['Cases in ±4%'] = pd.Series({'NEDC-H [%]':round(sum(abs(Gasolines['NEDC-H error [%]'])<4)/len(Gasolines['NEDC-H error [%]'])*100,1), 'UDC-H [%]':round(sum(abs(Gasolines['UDC-H error [%]'])<4)/len(Gasolines['UDC-H error [%]'])*100,1), 'EUDC-H [%]':round(sum(abs(Gasolines['EUDC-H error [%]'])<4)/len(Gasolines['EUDC-H error [%]'])*100,1),'NEDC-L [%]':round(sum(abs(Gasolines['NEDC-L error [%]'])<4)/len(Gasolines['NEDC-L error [%]'])*100,1), 'UDC-L [%]':round(sum(abs(Gasolines['UDC-L error [%]'])<4)/len(Gasolines['UDC-L error [%]'])*100,1), 'EUDC-L [%]':round(sum(abs(Gasolines['EUDC-L error [%]'])<4)/len(Gasolines['EUDC-L error [%]'])*100,1)})
GerrorsDF.loc['Cases in ±2.5%'] = pd.Series({'NEDC-H [%]':round(sum(abs(Gasolines['NEDC-H error [%]'])<2.5)/len(Gasolines['NEDC-H error [%]'])*100,1), 'UDC-H [%]':round(sum(abs(Gasolines['UDC-H error [%]'])<2.5)/len(Gasolines['UDC-H error [%]'])*100,1), 'EUDC-H [%]':round(sum(abs(Gasolines['EUDC-H error [%]'])<2.5)/len(Gasolines['EUDC-H error [%]'])*100,1),'NEDC-L [%]':round(sum(abs(Gasolines['NEDC-L error [%]'])<2.5)/len(Gasolines['NEDC-L error [%]'])*100,1), 'UDC-L [%]':round(sum(abs(Gasolines['UDC-L error [%]'])<2.5)/len(Gasolines['UDC-L error [%]'])*100,1), 'EUDC-L [%]':round(sum(abs(Gasolines['EUDC-L error [%]'])<2.5)/len(Gasolines['EUDC-L error [%]'])*100,1)})
GerrorsDF.loc['P75-P25'] = pd.Series({'NEDC-H [%]':round(Gasolines['NEDC-H error [%]'].quantile(0.75)-Gasolines['NEDC-H error [%]'].quantile(0.25),2), 'UDC-H [%]':round(Gasolines['UDC-H error [%]'].quantile(0.75)-Gasolines['UDC-H error [%]'].quantile(0.25),2), 'EUDC-H [%]':round(Gasolines['EUDC-H error [%]'].quantile(0.75)-Gasolines['EUDC-H error [%]'].quantile(0.25),2),'NEDC-L [%]':round(Gasolines['NEDC-L error [%]'].quantile(0.75)-Gasolines['NEDC-L error [%]'].quantile(0.25),2), 'UDC-L [%]':round(Gasolines['UDC-L error [%]'].quantile(0.75)-Gasolines['UDC-L error [%]'].quantile(0.25),2), 'EUDC-L [%]':round(Gasolines['EUDC-L error [%]'].quantile(0.75)-Gasolines['EUDC-L error [%]'].quantile(0.25),2)})
GerrorsDF.loc['Count'] = pd.Series({'NEDC-H [%]':round(Gasolines['NEDC-H error [%]'].count(),2), 'UDC-H [%]':round(Gasolines['UDC-H error [%]'].count(),2), 'EUDC-H [%]':round(Gasolines['EUDC-H error [%]'].count(),2),'NEDC-L [%]':round(Gasolines['NEDC-L error [%]'].count(),2), 'UDC-L [%]':round(Gasolines['UDC-L error [%]'].count(),2), 'EUDC-L [%]':round(Gasolines['EUDC-L error [%]'].count(),2)})
GerrorsDF.columns.name='GASOLINES'
GerrorsDF
Out[20]:
GASOLINES NEDC-H [%] UDC-H [%] EUDC-H [%] NEDC-L [%] UDC-L [%] EUDC-L [%]
Averages -0.7 -1.42 -0.08 -0.28 -1.03 0.37
StdDev 2.23 3.61 1.32 1.84 3.2 0.99
Minimum -5.09 -8.82 -2.83 -4.11 -7.94 -1.72
Maximum 6.66 10.77 3.67 5.35 8.77 3.25
Cases in ±4% 90.3 65.9 100 96.8 74.5 100
Cases in ±2.5% 67.4 47.4 92.4 80.6 51.9 92.8
P75-P25 2.65 4.33 1.57 2.19 3.79 1.23
Count 1519 1519 1519 1519 1519 1519
In [12]:
#DIESELS dataframe with the NECD, UDC, EUDC error statistics
DerrorsDF = pd.DataFrame(index=['Averages','StdDev','Minimum','Maximum','Cases in ±4%','Cases in ±2.5%'], columns=['NEDC-H [%]','UDC-H [%]', 'EUDC-H [%]','NEDC-L [%]','UDC-L [%]', 'EUDC-L [%]'])
DerrorsDF.loc['Averages'] = pd.Series({'NEDC-H [%]':round(Diesels['NEDC-H error [%]'].mean(),2), 'UDC-H [%]':round(Diesels['UDC-H error [%]'].mean(),2), 'EUDC-H [%]':round(Diesels['EUDC-H error [%]'].mean(),2),'NEDC-L [%]':round(Diesels['NEDC-L error [%]'].mean(),2), 'UDC-L [%]':round(Diesels['UDC-L error [%]'].mean(),2), 'EUDC-L [%]':round(Diesels['EUDC-L error [%]'].mean(),2)})
DerrorsDF.loc['StdDev'] = pd.Series({'NEDC-H [%]':round(Diesels['NEDC-H error [%]'].std(),2), 'UDC-H [%]':round(Diesels['UDC-H error [%]'].std(),2), 'EUDC-H [%]':round(Diesels['EUDC-H error [%]'].std(),2),'NEDC-L [%]':round(Diesels['NEDC-L error [%]'].std(),2), 'UDC-L [%]':round(Diesels['UDC-L error [%]'].std(),2), 'EUDC-L [%]':round(Diesels['EUDC-L error [%]'].std(),2)})
DerrorsDF.loc['Minimum'] = pd.Series({'NEDC-H [%]':round(Diesels['NEDC-H error [%]'].min(),2), 'UDC-H [%]':round(Diesels['UDC-H error [%]'].min(),2), 'EUDC-H [%]':round(Diesels['EUDC-H error [%]'].min(),2),'NEDC-L [%]':round(Diesels['NEDC-L error [%]'].min(),2), 'UDC-L [%]':round(Diesels['UDC-L error [%]'].min(),2), 'EUDC-L [%]':round(Diesels['EUDC-L error [%]'].min(),2)})
DerrorsDF.loc['Maximum'] = pd.Series({'NEDC-H [%]':round(Diesels['NEDC-H error [%]'].max(),2), 'UDC-H [%]':round(Diesels['UDC-H error [%]'].max(),2), 'EUDC-H [%]':round(Diesels['EUDC-H error [%]'].max(),2),'NEDC-L [%]':round(Diesels['NEDC-L error [%]'].max(),2), 'UDC-L [%]':round(Diesels['UDC-L error [%]'].max(),2), 'EUDC-L [%]':round(Diesels['EUDC-L error [%]'].max(),2)})
DerrorsDF.loc['Cases in ±4%'] = pd.Series({'NEDC-H [%]':round(sum(abs(Diesels['NEDC-H error [%]'])<4)/len(Diesels['NEDC-H error [%]'])*100,1), 'UDC-H [%]':round(sum(abs(Diesels['UDC-H error [%]'])<4)/len(Diesels['UDC-H error [%]'])*100,1), 'EUDC-H [%]':round(sum(abs(Diesels['EUDC-H error [%]'])<4)/len(Diesels['EUDC-H error [%]'])*100,1),'NEDC-L [%]':round(sum(abs(Diesels['NEDC-L error [%]'])<4)/len(Diesels['NEDC-L error [%]'])*100,1), 'UDC-L [%]':round(sum(abs(Diesels['UDC-L error [%]'])<4)/len(Diesels['UDC-L error [%]'])*100,1), 'EUDC-L [%]':round(sum(abs(Diesels['EUDC-L error [%]'])<4)/len(Diesels['EUDC-L error [%]'])*100,1)})
DerrorsDF.loc['Cases in ±2.5%'] = pd.Series({'NEDC-H [%]':round(sum(abs(Diesels['NEDC-H error [%]'])<2.5)/len(Diesels['NEDC-H error [%]'])*100,1), 'UDC-H [%]':round(sum(abs(Diesels['UDC-H error [%]'])<2.5)/len(Diesels['UDC-H error [%]'])*100,1), 'EUDC-H [%]':round(sum(abs(Diesels['EUDC-H error [%]'])<2.5)/len(Diesels['EUDC-H error [%]'])*100,1),'NEDC-L [%]':round(sum(abs(Diesels['NEDC-L error [%]'])<2.5)/len(Diesels['NEDC-L error [%]'])*100,1), 'UDC-L [%]':round(sum(abs(Diesels['UDC-L error [%]'])<2.5)/len(Diesels['UDC-L error [%]'])*100,1), 'EUDC-L [%]':round(sum(abs(Diesels['EUDC-L error [%]'])<2.5)/len(Diesels['EUDC-L error [%]'])*100,1)})
DerrorsDF.loc['P75-P25'] = pd.Series({'NEDC-H [%]':round(Diesels['NEDC-H error [%]'].quantile(0.75)-Diesels['NEDC-H error [%]'].quantile(0.25),2), 'UDC-H [%]':round(Diesels['UDC-H error [%]'].quantile(0.75)-Diesels['UDC-H error [%]'].quantile(0.25),2), 'EUDC-H [%]':round(Diesels['EUDC-H error [%]'].quantile(0.75)-Diesels['EUDC-H error [%]'].quantile(0.25),2),'NEDC-L [%]':round(Diesels['NEDC-L error [%]'].quantile(0.75)-Diesels['NEDC-L error [%]'].quantile(0.25),2), 'UDC-L [%]':round(Diesels['UDC-L error [%]'].quantile(0.75)-Diesels['UDC-L error [%]'].quantile(0.25),2), 'EUDC-L [%]':round(Diesels['EUDC-L error [%]'].quantile(0.75)-Diesels['EUDC-L error [%]'].quantile(0.25),2)})
DerrorsDF.loc['Count'] = pd.Series({'NEDC-H [%]':round(Diesels['NEDC-H error [%]'].count(),2), 'UDC-H [%]':round(Diesels['UDC-H error [%]'].count(),2), 'EUDC-H [%]':round(Diesels['EUDC-H error [%]'].count(),2),'NEDC-L [%]':round(Diesels['NEDC-L error [%]'].count(),2), 'UDC-L [%]':round(Diesels['UDC-L error [%]'].count(),2), 'EUDC-L [%]':round(Diesels['EUDC-L error [%]'].count(),2)})
DerrorsDF.columns.name='DIESELS'
DerrorsDF
Out[12]:
DIESELS NEDC-H [%] UDC-H [%] EUDC-H [%] NEDC-L [%] UDC-L [%] EUDC-L [%]
Averages 0.41 0.98 -0.04 1.03 1.86 0.36
StdDev 2.74 3.36 2.83 2.18 3.28 1.88
Minimum -7.32 -7.98 -6.84 -4.97 -7.24 -3.68
Maximum 7.14 11.71 5.67 7.23 12.08 4.89
Cases in ±4% 82.9 75.7 80.6 88.4 72.4 98.5
Cases in ±2.5% 55.8 59.2 59 69.7 51.4 71.4
P75-P25 3.77 3.92 3.82 2.6 3.86 2.64
Count 977 977 977 977 977 977

Distribution of the NEDC, UDC and EUDC errors [%]

In [13]:
mydict = ([('NEDC', 0), ('UDC', 1), ('EUDC', 2)])
mydict = collections.OrderedDict(mydict)
for cycle in mydict:
    if cycle == 'NEDC':
        boxcolor = 'green'
    elif cycle == 'UDC':
        boxcolor = 'blue'
    else:
        boxcolor = 'red'
    # Create a figure instance
    fig = plt.figure(1, figsize=(14, 7))
    # Create an axes instance
    ax = fig.add_subplot(111)
    hist = valuesDF[cycle+'-H error [%]'].hist(bins=np.arange(min(valuesDF['UDC-H error [%]']), max(valuesDF['UDC-H error [%]']) + 0.5, 0.5), color=boxcolor, label = 'High', ec='black')
    hist = valuesDF[cycle+'-L error [%]'].hist(bins=np.arange(min(valuesDF['UDC-L error [%]']), max(valuesDF['UDC-L error [%]']) + 0.5, 0.5), color=boxcolor, alpha=0.5, label='Low', ec='black')
    hist.set_xlabel(cycle+" error [%]",fontsize=14)
    hist.set_ylabel("frequency",fontsize=14)
    hist.set_ylim(0,500)
    plt.title(cycle+' CO$_2$ emission error distribution', fontsize=20)
    plt.ylabel("frequency",fontsize=18)
    plt.tick_params(axis='x', which='major', labelsize=16)
    plt.tick_params(axis='y', which='major', labelsize=16)
    ax.get_xaxis().tick_bottom()
    ax.get_yaxis().tick_left()
    ax.set_xlim(-15, 15)
    plt.legend()
    plt.show()

Comparative emission error per driving cycle (%)

In [14]:
#Alternatively show boxplots
toboxplot = [valuesDF['NEDC-H error [%]'],valuesDF['NEDC-L error [%]'],valuesDF['UDC-H error [%]'],valuesDF['UDC-L error [%]'],
            valuesDF['EUDC-H error [%]'],valuesDF['EUDC-L error [%]']]
# Create a figure instance
fig = plt.figure(1, figsize=(14, 7))
# Create an axes instance
ax = fig.add_subplot(111)
# Create the boxplot with fill color
bp = ax.boxplot(toboxplot, sym='', patch_artist=True, whis=10000, showmeans=True, meanprops=(dict(marker='o',markerfacecolor='yellow')))
for box in bp['boxes']:
    # change outline color
    box.set( color='black', linewidth=1)
    # change fill color
    box.set( facecolor = '#b78adf' )
    ## Custom x-axis labels
ax.set_xticklabels(['NEDC-H','NEDC-L', 'UDC-H','UDC-L','EUDC-H','EUDC-L'],fontsize=20)
## Remove top axes and right axes ticks
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
#Set y axis title
plt.title('CO$_2$ emission error by driving cycle', fontsize=20)
plt.ylabel("error [%]",fontsize=18)
plt.tick_params(axis='y', which='major', labelsize=16)
ax.set_ylim(-20, 20)
plt.setp(bp['medians'], color = 'purple', linewidth = 2)
plt.show()
print('The purple box represents the 1st and 3rd quartile.\nThe dark purple line is the median.\nThe yellow dot is the mean.\nthe whiskers show the min and max values.')
The purple box represents the 1st and 3rd quartile.
The dark purple line is the median.
The yellow dot is the mean.
the whiskers show the min and max values.

Error statistics per technology type

In [15]:
#Print a dictionary with the tested technologies and their identification codes
tec = pd.DataFrame(index=['Base case','Gear configuration A','Gear configuration B','No Start/Stop','No Break energy recuperation','Variable valve lifting','Direct injection/Multipoint injection','Thermal management'])
tec['Technology code'] = ['BC','GCA','GCB','NOSS','NOBERS','VVL','DI/MPI','ThM']
tec.columns.name='Technology type'
tec
Out[15]:
Technology type Technology code
Base case BC
Gear configuration A GCA
Gear configuration B GCB
No Start/Stop NOSS
No Break energy recuperation NOBERS
Variable valve lifting VVL
Direct injection/Multipoint injection DI/MPI
Thermal management ThM
In [16]:
#Function that assigns the number of case to the specific technology tested for each vehicle model
def assign_technol_perCarAndCase(df):
    #looks for the case # in the input file and assigns a technology
    df_basecase = df[valuesDF['Case'] <= 27]
    df_gca = df[(valuesDF['Case'] > 27) & (valuesDF['Case'] <= 54)]
    df_gcb = df[(valuesDF['Case'] > 54) & (valuesDF['Case'] <= 81)]
    df_noss = df[(valuesDF['Case'] > 81) & (valuesDF['Case'] <= 108)]
    df_nobers = df[(valuesDF['Case'] > 108) & (valuesDF['Case'] <= 135)]
    #some vehicles have more possible technologies than others (long vs short) and an additional technology assignment is performed for the former group
    In_long = (valuesDF['Model'] == 'Vehicle04') | (valuesDF['Model'] == 'Vehicle02') | (valuesDF['Model'] == 'Vehicle10') | (valuesDF['Model'] == 'Vehicle06') | (valuesDF['Model'] == 'Vehicle11') | (valuesDF['Model'] == 'Vehicle09')| (valuesDF['Model'] == 'Vehicle05')
    In_short = (valuesDF['Model'] == 'Vehicle01') | (valuesDF['Model'] == 'Vehicle03') | (valuesDF['Model'] == 'Vehicle07') | (valuesDF['Model'] == 'Vehicle08') | (valuesDF['Model'] == 'Vehicle12')
    I_vvl = (valuesDF['Case'] >= 136) & (valuesDF['Case'] <= 162)
    df_vvl = df[In_long & I_vvl]
    I_dimpi = (valuesDF['Case'] >= 163) & (valuesDF['Case'] <= 189)
    df_dimpi = df[In_long & I_dimpi]
    I_short_tm = (valuesDF['Case'] >= 136)
    I_long_tm = (valuesDF['Case'] >= 190)
    I_tm = (In_short & I_short_tm) | (In_long & I_long_tm)
    df_tm = df[I_tm]
    #Append to the original DF a column with the technology IDcode
    pd.options.mode.chained_assignment = None  # default='warn'
    try:
        df_basecase.loc[:,'Tecno'] = 'BC'
    except:
        pass
    try:
        df_gca.loc[:,'Tecno'] = 'GCA'
    except:
        pass
    try:
        df_gcb.loc[:,'Tecno'] = 'GCB'
    except:
        pass
    try:
        df_noss.loc[:,'Tecno'] = 'NOSS'
    except:
        pass
    try:
        df_nobers.loc[:,'Tecno'] = 'NOBERS'
    except:
        pass
    try:
        df_vvl.loc[:,'Tecno'] = 'VVL'
    except:
        pass
    try:
        df_dimpi.loc[:,'Tecno'] = 'DI/MPI'
    except:
        pass
    try:
        df_tm.loc[:,'Tecno'] = 'ThM'
    except:
        pass
    bigdata = pd.concat([df_basecase,df_gca,df_gcb,df_noss,df_nobers,df_vvl,df_dimpi,df_tm], ignore_index=False)
    return bigdata
In [17]:
#Plot the NEDC errors per technology type in a boxplot
tech = assign_technol_perCarAndCase(valuesDF)
techBC = tech[tech['Tecno'] == 'BC']
techGCA = tech[tech['Tecno'] == 'GCA']
techGCB = tech[tech['Tecno'] == 'GCB']
techNOSS = tech[tech['Tecno'] == 'NOSS']
techBERS = tech[tech['Tecno'] == 'NOBERS']
techVVL = tech[tech['Tecno'] == 'VVL']
techDIMPI = tech[tech['Tecno'] == 'DI/MPI']
techThM = tech[tech['Tecno'] == 'ThM']
mydict = ([('NEDC', 0), ('UDC', 1), ('EUDC', 2)])
mydict = collections.OrderedDict(mydict)
for cycle in mydict:
    techboxplot = [techBC[cycle+'-H error [%]'],techGCA[cycle+'-H error [%]'],techGCB[cycle+'-H error [%]'],techNOSS[cycle+'-H error [%]'],techBERS[cycle+'-H error [%]'],techVVL[cycle+'-H error [%]'],techDIMPI[cycle+'-H error [%]'],techThM[cycle+'-H error [%]']]
    if cycle == 'NEDC':
        boxcolor = 'green'
    elif cycle == 'UDC':
        boxcolor = 'blue'
    else:
        boxcolor = 'red'
    # Create a figure instance
    fig = plt.figure(1, figsize=(14, 7))
    # Create an axes instance
    ax = fig.add_subplot(111)
    # Create the boxplot with fill color
    bp = ax.boxplot(techboxplot, sym='', patch_artist=True, whis=10000, showmeans=True, meanprops=(dict(marker='o',markerfacecolor='yellow')))
    for box in bp['boxes']:
        # change outline color
        box.set( color='black', linewidth=1)
        # change fill color
        box.set(facecolor = boxcolor)            
        ## Custom x-axis labels
    ax.set_xticklabels(['BC', 'GCA', 'GCB','NOSS','NOBERS','VVL','DI/MPI','ThM'],fontsize=20)
    ## Remove top axes and right axes ticks
    ax.get_xaxis().tick_bottom()
    ax.get_yaxis().tick_left()
    #Set y axis title
    plt.title(cycle+' CO$_2$ emission error by technology type', fontsize=20)
    plt.ylabel("error [%]",fontsize=18)
    plt.tick_params(axis='y', which='major', labelsize=18)
    ax.set_ylim(-20, 20)
    plt.setp(bp['medians'], color = 'purple', linewidth = 2)
    plt.show()
    print('The green box represents the 1st and 3rd quartile.\nThe dark purple line is the median.\nThe yellow dot is the mean.\nthe whiskers show the min and max values.')
    print('\nDescriptive statistics for '+cycle+' CO2 emission error per technology type')
    grouped = tech.groupby('Tecno')
    gmean = grouped[cycle+'-H error [%]'].mean()
    gsem = grouped[cycle+'-H error [%]'].sem()
    gmedian = grouped[cycle+'-H error [%]'].median()
    gstd = grouped[cycle+'-H error [%]'].std()
    gvar = grouped[cycle+'-H error [%]'].var()
    gskew = grouped[cycle+'-H error [%]'].skew()
    grange = (grouped[cycle+'-H error [%]'].max()-grouped[cycle+'-H error [%]'].min())
    gmin = grouped[cycle+'-H error [%]'].min()
    gmax = grouped[cycle+'-H error [%]'].max()
    gsum = grouped[cycle+'-H error [%]'].sum()
    gcount = grouped[cycle+'-H error [%]'].count()
    gCI95 = 2*grouped[cycle+'-H error [%]'].sem()
    errorsTec = pd.DataFrame(index=['Averages','StdError','Median','StdDev','Variance','Kurtosis','Skweness','Range','Minimum','Maximum','Sum','Count','Confidence level (95%)'], columns=['BC','GCA', 'GCB','NOSS','NOBERS','VVL','DI/MPI','ThM'])
    errorsTec.loc['Averages'] = pd.Series.round(gmean,2)
    errorsTec.loc['StdError'] = pd.Series.round(gsem,2)
    errorsTec.loc['Median'] = pd.Series.round(gmedian,2)
    errorsTec.loc['StdDev'] = pd.Series.round(gstd,2)
    errorsTec.loc['Variance'] = pd.Series.round(gvar,2)
    errorsTec.loc['Kurtosis'] = [round(techBC[cycle+'-H error [%]'].kurtosis(),2),round(techGCA[cycle+'-H error [%]'].kurtosis(),2),round(techGCB[cycle+'-H error [%]'].kurtosis(),2),round(techNOSS[cycle+'-H error [%]'].kurtosis(),2),round(techBERS[cycle+'-H error [%]'].kurtosis(),2),round(techVVL[cycle+'-H error [%]'].kurtosis(),2),round(techDIMPI[cycle+'-H error [%]'].kurtosis(),2),round(techThM[cycle+'-H error [%]'].kurtosis(),2)]
    errorsTec.loc['Skweness'] = pd.Series.round(gskew,2)
    errorsTec.loc['Range'] = pd.Series.round(grange,2)
    errorsTec.loc['Minimum'] = pd.Series.round(gmin,2)
    errorsTec.loc['Maximum'] = pd.Series.round(gmax,2)
    errorsTec.loc['Sum'] = pd.Series.round(gsum)
    errorsTec.loc['Count'] = pd.Series.round(gcount)
    errorsTec.loc['Confidence level (95%)'] = pd.Series.round(gCI95,2)
    errorsTec.columns.name=cycle+' error [%]'
    display(errorsTec)
The green box represents the 1st and 3rd quartile.
The dark purple line is the median.
The yellow dot is the mean.
the whiskers show the min and max values.

Descriptive statistics for NEDC CO2 emission error per technology type
NEDC error [%] BC GCA GCB NOSS NOBERS VVL DI/MPI ThM
Averages 0.08 -0.01 0.24 0.71 -2.34 -1.28 -0.45 0.41
StdError 0.12 0.13 0.13 0.14 0.12 0.2 0.12 0.12
Median -0.21 -0.3 -0.2 0.2 -2.88 -1.89 -0.31 0.09
StdDev 2.21 2.37 2.4 2.56 2.27 2.69 1.67 2.17
Variance 4.9 5.63 5.75 6.53 5.14 7.25 2.79 4.71
Kurtosis -0.19 -0.31 -0.57 -0.45 0.34 -0.74 -0 -0.14
Skweness 0.45 0.57 0.54 0.75 0.83 0.67 0.59 0.22
Range 10.59 10.86 10.35 10.28 12 9.43 6.98 10.27
Minimum -5.18 -4.49 -3.69 -3.14 -7.32 -5.02 -2.93 -4.99
Maximum 5.42 6.38 6.66 7.14 4.68 4.41 4.05 5.28
Sum 30 -4 83 249 -823 -241 -85 132
Count 364 351 351 351 351 189 189 324
Confidence level (95%) 0.23 0.25 0.26 0.27 0.24 0.39 0.24 0.24
The green box represents the 1st and 3rd quartile.
The dark purple line is the median.
The yellow dot is the mean.
the whiskers show the min and max values.

Descriptive statistics for UDC CO2 emission error per technology type
UDC error [%] BC GCA GCB NOSS NOBERS VVL DI/MPI ThM
Averages 0.2 0.05 0.04 1.61 -4.38 -2.14 -0.92 0.67
StdError 0.15 0.18 0.16 0.21 0.16 0.3 0.2 0.15
Median -0.25 -0.75 -0.48 1.09 -5.16 -3.65 -0.53 0.07
StdDev 2.96 3.38 3.09 3.91 2.94 4.18 2.69 2.73
Variance 8.74 11.43 9.52 15.26 8.63 17.46 7.25 7.48
Kurtosis -0.15 0.23 0.53 -0.59 -0.42 -0.54 -0.42 -0.22
Skweness 0.51 0.69 0.56 0.33 0.66 0.86 0.26 0.73
Range 13.68 16.95 17.12 17.44 13.01 15.5 11.54 12.83
Minimum -5.54 -6.19 -6.35 -5.73 -8.82 -8.03 -5.69 -4.72
Maximum 8.14 10.77 10.76 11.71 4.18 7.47 5.85 8.11
Sum 71 18 14 567 -1536 -404 -174 216
Count 364 351 351 351 351 189 189 324
Confidence level (95%) 0.31 0.36 0.33 0.42 0.31 0.61 0.39 0.3
The green box represents the 1st and 3rd quartile.
The dark purple line is the median.
The yellow dot is the mean.
the whiskers show the min and max values.

Descriptive statistics for EUDC CO2 emission error per technology type
EUDC error [%] BC GCA GCB NOSS NOBERS VVL DI/MPI ThM
Averages 0 -0.03 0.4 -0 -0.61 -0.52 -0.05 0.2
StdError 0.11 0.11 0.12 0.11 0.12 0.12 0.08 0.12
Median -0.08 -0.14 -0.11 -0.21 -1.06 -0.62 0 0.22
StdDev 2.06 2.12 2.25 2.06 2.22 1.62 1.04 2.09
Variance 4.25 4.5 5.06 4.24 4.91 2.62 1.09 4.37
Kurtosis 1.67 0.19 -0.47 1.43 1.5 -1.26 0.11 2.15
Skweness -0.93 -0.54 -0.07 -0.76 0.17 0.18 0.7 -1.25
Range 9.93 9.58 9.75 9.79 12.51 5.4 4.86 9.82
Minimum -6.36 -5.95 -5.05 -6.09 -6.84 -2.83 -1.69 -6.4
Maximum 3.56 3.63 4.7 3.7 5.67 2.58 3.17 3.42
Sum 1 -9 142 -0 -215 -98 -10 64
Count 364 351 351 351 351 189 189 324
Confidence level (95%) 0.22 0.23 0.24 0.22 0.24 0.24 0.15 0.23

Section 2. Performance of the model. Statistics per vehicle model and case test.

Glossary of vehicle models and number of test cases considered in the report

In [18]:
mod_cases_stats = valuesDF.groupby(['Model'],as_index=False).count() 
cols = mod_cases_stats.columns.tolist()
cols = cols[-1:-1] + cols[:2]
mod_cases_stats = mod_cases_stats[cols]
mod_cases_stats
Out[18]:
Model NEDC
0 Vehicle01 163
1 Vehicle02 217
2 Vehicle03 163
3 Vehicle04 217
4 Vehicle05 217
5 Vehicle06 217
6 Vehicle07 163
7 Vehicle08 163
8 Vehicle09 217
9 Vehicle10 217
10 Vehicle11 217
11 Vehicle12 163
12 Vehicle13 162

NEDC, UDC, and EUDC CO$_2$ emission error per vehicle model

In [19]:
#In order to create statistic tables and plots for each model car, a numeric car ID 'cid' has to be assigned to each vehicle
tech = assign_technol_perCarAndCase(valuesDF)
Carlist = list(sorted(tech['Model'].unique()))
Cidlist = list(range(len(Carlist)))
tech.cid = tech['Model'].replace(Carlist, Cidlist, regex = True)
tech['cod'] = tech.cid
dictecnos = {'BC':'o', 'GCA':'s', 'GCB':'v', 'NOSS':'p','NOBERS':'D','VVL':'4','DI/MPI':'+','ThM':'*'}
#Create a table with the error statistics for each car model
for x in Carlist:
    Car = tech[tech['Model'] == x]
    grouped = Car.groupby('Tecno')
    CarDF = pd.DataFrame(index=['Averages','Median', 'StdDev'], columns=['NEDC [gCO$_2$ km$^{-1}$]','UDC [gCO$_2$ km$^{-1}$]', 'EUDC [gCO$_2$ km$^{-1}$]'])
    CarDF.loc['Averages'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(Car.dNEDC.mean(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(Car.dUDC.mean(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(Car.dEUDC.mean(),2)})
    CarDF.loc['Median'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(Car.dNEDC.median(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(Car.dUDC.median(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(Car.dEUDC.median(),2)})
    CarDF.loc['StdDev'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(Car.dNEDC.std(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(Car.dUDC.std(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(Car.dEUDC.std(),2)})
    CarDF.columns.name=Car.iat[0,13]
    display(CarDF)
    pCarDF = pd.DataFrame(index=['Averages','Median', 'StdDev'], columns=['NEDC-H [%]','NEDC-L [%]'])
    pCarDF.loc['Averages'] = pd.Series({'NEDC-H [%]':round(Car['NEDC-H error [%]'].mean(),2), 'NEDC-L [%]':round(Car['NEDC-L error [%]'].mean(),2)})
    pCarDF.loc['Median'] = pd.Series({'NEDC-H [%]':round(Car['NEDC-H error [%]'].median(),2), 'NEDC-L [%]':round(Car['NEDC-L error [%]'].median(),2)})
    pCarDF.loc['StdDev'] = pd.Series({'NEDC-H [%]':round(Car['NEDC-H error [%]'].std(),2), 'NEDC-L [%]':round(Car['NEDC-L error [%]'].std(),2)})
    pCarDF.columns.name=Car.iat[0,13]
    display(pCarDF)
    #plot the CO2 emission error histogram per vehicle model and cycle
    mydict = ([('NEDC', 0), ('UDC', 1), ('EUDC', 2)])
    mydict = collections.OrderedDict(mydict)
    for cycle in mydict:
        if cycle == 'NEDC':
            boxcolor = 'green'
        elif cycle == 'UDC':
            boxcolor = 'blue'
        else:
            boxcolor = 'red'
        fig = plt.figure(1, figsize=(14, 7))
        plt.title(Car.iat[0,13],fontsize=20)
        plot = fig.add_subplot(111)
        plot.tick_params(axis='x', which='major', labelsize=14)
        plot.tick_params(axis='y', which='major', labelsize=14)
        plot.set_xlim(-15, 15)
        plot.get_xaxis().tick_bottom()
        plot.get_yaxis().tick_left()
        car_hist = Car[cycle+'-H error [%]'].hist(bins=25, color=boxcolor, ec='black')
        car_hist.set_xlabel(cycle+" CO$_2$ emission error [%]",fontsize=20)
        car_hist.set_ylabel("frequency",fontsize=20)
        plt.show()
    #plot the emission error per case, model, and cycle
        fig = plt.figure(1, figsize=(14, 7))
        plt.title(Car.iat[0,13],fontsize=20)
        plot = fig.add_subplot(111)
        plot.tick_params(axis='x', which='major', labelsize=14)
        plot.tick_params(axis='y', which='major', labelsize=14)
        plot.set_xlim(0, 220)
        plot.set_ylim(-20,20)
        plot.get_xaxis().tick_bottom()
        plot.get_yaxis().tick_left()
        for key, group in grouped:
            plt.plot(group['Case'], group[cycle+'-H error [%]'], color=boxcolor, marker=dictecnos[key], label = key, linestyle='')
            first_legend = plt.legend(numpoints=1, bbox_to_anchor=(1.0, 1.), loc=1, borderaxespad=0.)
            plot.ax = plt.gca().add_artist(first_legend)
        plot.set_xlabel("Case #",fontsize=20)
        plot.set_ylabel(cycle+" error [%]",fontsize=20)
        line1 = plot.axhline(y=-2.5, color='grey', linestyle='-.', label='± 2.5 gCO$_2$ km$^{-1}$')
        line2 = plot.axhline(y=2.5, color='grey', linestyle='-.')
        line3 = plot.axhline(y=-4, color='black', linestyle='--', label='± 4.0 gCO$_2$ km$^{-1}$')
        line4 = plot.axhline(y=4, color='black', linestyle='--')
        plt.legend(handles=[line1, line3], loc = 3)
        plt.show()
Vehicle01 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages 4.81 7.58 3.14
Median 4.8 7.41 3.15
StdDev 1.45 3.58 0.94
Vehicle01 NEDC-H [%] NEDC-L [%]
Averages 4.13 4.03
Median 4.19 4.1
StdDev 1.22 1.47
Vehicle02 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages -0.1 0.88 -0.74
Median 0.08 1.55 -0.74
StdDev 1.47 2.98 0.83
Vehicle02 NEDC-H [%] NEDC-L [%]
Averages -0.06 0.32
Median 0.06 0.38
StdDev 1.01 0.97
Vehicle03 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages -0.6 -1.31 -0.23
Median -0.25 -0.88 -0.03
StdDev 1.98 4.57 0.55
Vehicle03 NEDC-H [%] NEDC-L [%]
Averages -0.5 -0.14
Median -0.22 0.17
StdDev 1.64 1.51
Vehicle04 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages -1.24 -2.85 -0.3
Median -0.94 -2.22 -0.22
StdDev 1.34 3.11 0.44
Vehicle04 NEDC-H [%] NEDC-L [%]
Averages -1.08 -0.6
Median -0.83 -0.41
StdDev 1.16 1.05
Vehicle05 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages -3.11 -6.24 -1.29
Median -2.75 -5.44 -1.11
StdDev 1.4 2.81 0.65
Vehicle05 NEDC-H [%] NEDC-L [%]
Averages -2.76 -1.59
Median -2.43 -1.3
StdDev 1.2 0.9
Vehicle06 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages -2.77 -6.83 -0.4
Median -2.84 -6.79 -0.46
StdDev 1.2 2.39 0.64
Vehicle06 NEDC-H [%] NEDC-L [%]
Averages -2.41 -1.82
Median -2.46 -1.76
StdDev 1.02 0.88
Vehicle07 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages -0.34 -1.74 0.45
Median 0.06 -0.95 0.42
StdDev 1.23 2.72 0.71
Vehicle07 NEDC-H [%] NEDC-L [%]
Averages -0.25 -0.37
Median 0.05 -0.11
StdDev 0.92 0.98
Vehicle08 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages 3.61 4.28 3.19
Median 3.76 4.08 3.32
StdDev 2.05 4.91 0.75
Vehicle08 NEDC-H [%] NEDC-L [%]
Averages 2.77 2.59
Median 2.88 2.66
StdDev 1.55 1.55
Vehicle09 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages -0.44 -2.72 0.89
Median 0.31 -1.39 0.87
StdDev 2.18 4.03 1.41
Vehicle09 NEDC-H [%] NEDC-L [%]
Averages -0.3 0.2
Median 0.23 0.51
StdDev 1.57 1.38
Vehicle10 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages -2.25 -4.31 -1.04
Median -1.58 -2.83 -0.79
StdDev 1.81 3.79 0.88
Vehicle10 NEDC-H [%] NEDC-L [%]
Averages -1.51 -1.26
Median -1.05 -0.98
StdDev 1.2 0.99
Vehicle11 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages 4.4 7.52 2.58
Median 4.57 8.09 2.53
StdDev 2.06 4.61 0.89
Vehicle11 NEDC-H [%] NEDC-L [%]
Averages 3.19 2.8
Median 3.28 2.91
StdDev 1.51 1.43
Vehicle12 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages -7.32 -2.54 -10.1
Median -7.22 -2.71 -10.13
StdDev 2.81 6.6 1.36
Vehicle12 NEDC-H [%] NEDC-L [%]
Averages -3.37 -0.75
Median -3.31 -0.88
StdDev 1.28 1.36
Vehicle13 NEDC [gCO$_2$ km$^{-1}$] UDC [gCO$_2$ km$^{-1}$] EUDC [gCO$_2$ km$^{-1}$]
Averages -0.74 2.22 -2.44
Median -0.68 2.53 -2.88
StdDev 1.91 3.34 1.79
Vehicle13 NEDC-H [%] NEDC-L [%]
Averages -0.31 0.84
Median -0.29 0.83
StdDev 0.8 0.99