# path = r'C:/Users/dimit/Desktop/20191107_213830-summary_4.1.10.xlsx'
# # path = r'C:/Users/dimit/Desktop/20190729_121722-summary_v3.0.0.xlsx'
# list_header = [0, 1, 2, 3,]
# if '4.1.10' in path:
# list_header = [0, 1, 2, 3, 4]
# df = pd.read_excel(path, 'summary', header=list_header, index_col=[0], skiprows=[4])
# files = list(df.index)
# path_input = 'C:/Work/co2mpasdb/inputs/'
# input_names = os.listdir(path_input)
# dict_input_names = dict()
# for f in input_names:
# nn = f.replace(' ', '_').split('.xlsx')[0].split('.XLSX')[0].replace('(', '').replace(')', '')
# dict_input_names[nn] = f.split('.xlsx')[0].split('.XLSX')[0]
# import os
# gb_dict = dict()
# mt, at, cvt = 1, 1, 1
# for f in files:
# f_name = f
# if '4.1.10' in path:
# f_name = dict_input_names[f.replace(' ', '_')]
# _df = pd.read_excel(path_input + f_name + '.xlsx', 'Inputs')[['Name', 'Value']].dropna()
# v = _df.loc[_df['Name']=='gear_box_type', 'Value'].values[0]
# if v == 'manual':
# gb_dict[f] = ['MT_' + ('0' + str(mt))[-2:]]
# mt += 1
# if v == 'automatic':
# gb_dict[f] = ['AT_' + ('0' + str(at))[-2:]]
# at += 1
# if v == 'CVT':
# gb_dict[f] = ['CVT_' + ('0' + str(cvt))[-2:]]
# cvt += 1
# pd.DataFrame.from_dict(gb_dict, orient='index', columns=['gb']).to_excel(path.split('.xlsx')[0]+'_gb_type.xlsx')
import IPython.core.display as di
# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)
# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Show/Hide code</button>''', raw=True)
#Allow the created content to be interactivelly ploted inline
%matplotlib inline
#Establish width and height for all plots in the report
#pylab.rcParams['figure.figsize'] = (18, 6) #width, height
#Import needed libraries
import os
from os.path import join, getsize
import pandas as pd
from cycler import cycler
import matplotlib.pyplot as plt
from IPython.display import display
import collections
import numpy as np
import matplotlib as mpl
inline_rc = dict(mpl.rcParams)
pd.options.display.max_rows=1000
#the next cell enables plotting tables without borders
# To hide warnings
import warnings
warnings.filterwarnings('ignore')
%%html
<style>
table,td,tr,th {border:none!important}
</style>
#Specify the output folder and file containing the CO2MPAS summary output file.
folder = r'd:/Documents/CO2MPAS/co2mpas_validation_reports/'
file = '20190729_121722-summary_v3.0.0_ANON.xlsx'
infile = join(folder, file)
df=pd.read_excel(infile, 'ANON', header=[0, 1, 2, 3], index_col=[0], skiprows=[4])
#Gather and name the basic variables used in the report according to their name in the CO2MPAS output file
NEDC = df['nedc_h']['prediction']['output']['declared_value']
NEDCl = df['nedc_l']['prediction']['output']['declared_value']
NEDCt = df['nedc_h']['prediction']['target']['declared_value'].fillna(df['nedc_h']['prediction']['target']['value'])
NEDCtl = df['nedc_l']['prediction']['target']['declared_value'].fillna(df['nedc_l']['prediction']['target']['value'])
UDC = df['nedc_h']['prediction']['output']['UDC']
UDCl = df['nedc_l']['prediction']['output']['UDC']
UDCt = df['nedc_h']['prediction']['target']['UDC']
UDCtl = df['nedc_l']['prediction']['target']['UDC']
EUDC = df['nedc_h']['prediction']['output']['EUDC']
EUDCl = df['nedc_l']['prediction']['output']['EUDC']
EUDCt = df['nedc_h']['prediction']['target']['EUDC']
EUDCtl = df['nedc_l']['prediction']['target']['EUDC']
#Obtain the case number and vehicle model from the input file
df['vehicle'] = df.index
model = df['vehicle'].str.split('_test').str[0]
#Create a dataframe with this data
valuesDF = pd.DataFrame({'NEDC': NEDC,'NEDCt':NEDCt,'NEDCtl':NEDCtl, 'dNEDC':NEDC-NEDCt,'dNEDCl':NEDCl-NEDCtl,'UDC': UDC,'UDCt':UDCt, 'UDCtl':UDCtl,'dUDC':UDC-UDCt,'dUDCl':UDCl-UDCtl,'EUDC': EUDC,'EUDCt':EUDCt,'EUDCtl':EUDCtl, 'dEUDC':EUDC-EUDCt,'dEUDCl':EUDCl-EUDCtl,'Model':model})
#calculate percentages
valuesDF['NEDC-H error [%]'] = pd.Series((valuesDF.dNEDC/valuesDF.NEDCt*100), index=valuesDF.index)
valuesDF['UDC-H error [%]'] = pd.Series((valuesDF.dUDC/valuesDF.UDCt*100), index=valuesDF.index)
valuesDF['EUDC-H error [%]'] = pd.Series((valuesDF.dEUDC/valuesDF.EUDCt*100), index=valuesDF.index)
valuesDF['NEDC-L error [%]'] = pd.Series((valuesDF.dNEDCl/valuesDF.NEDCtl*100), index=valuesDF.index)
valuesDF['UDC-L error [%]'] = pd.Series((valuesDF.dUDCl/valuesDF.UDCtl*100), index=valuesDF.index)
valuesDF['EUDC-L error [%]'] = pd.Series((valuesDF.dEUDCl/valuesDF.EUDCtl*100), index=valuesDF.index)
valuesDF['NEDC-H error [%]'] = valuesDF['NEDC-H error [%]'].dropna()
valuesDF['UDC-H error [%]'] = valuesDF['UDC-H error [%]'].dropna()
valuesDF['EUDC-H error [%]'] = valuesDF['EUDC-H error [%]'].dropna()
valuesDF['NEDC-L error [%]'] = valuesDF['NEDC-L error [%]'].dropna()
valuesDF['UDC-L error [%]'] = valuesDF['UDC-L error [%]'].dropna()
valuesDF['EUDC-L error [%]'] = valuesDF['EUDC-L error [%]'].dropna()
#Create a dataframe with the NECD, UDC, EUDC error statistics
errorsDF = pd.DataFrame(index=['Averages','StdError','Median','StdDev','Variance','Kurtosis','Skweness','Range','Minimum','Maximum','Sum','Count','Confidence level (95%)'], columns=['NEDC-H [gCO$_2$ km$^{-1}$]','UDC-H [gCO$_2$ km$^{-1}$]', 'EUDC-H [gCO$_2$ km$^{-1}$]', 'NEDC-L [gCO$_2$ km$^{-1}$]','UDC-L [gCO$_2$ km$^{-1}$]', 'EUDC-L [gCO$_2$ km$^{-1}$]'])
errorsDF.loc['Averages'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.mean(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.mean(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.mean(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.mean(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.mean(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.mean(),2)})
errorsDF.loc['StdError'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.sem(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.sem(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.sem(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.sem(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.sem(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.sem(),2)})
errorsDF.loc['Median'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.median(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.median(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.median(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.median(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.median(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.median(),2)})
errorsDF.loc['StdDev'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.std(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.std(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.std(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.std(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.std(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.std(),2)})
errorsDF.loc['Variance'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.var(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.var(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.var(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.var(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.var(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.var(),2)})
errorsDF.loc['Kurtosis'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.kurtosis(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.kurtosis(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.kurtosis(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.kurtosis(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.kurtosis(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.kurtosis(),2)})
errorsDF.loc['Skweness'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.skew(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.skew(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.skew(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.skew(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.skew(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.skew(),2)})
errorsDF.loc['Range'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round((valuesDF.dNEDC.max()-valuesDF.dNEDC.min()),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round((valuesDF.dUDC.max()-valuesDF.dUDC.min()),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round((valuesDF.dEUDC.max()-valuesDF.dEUDC.min()),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round((valuesDF.dNEDCl.max()-valuesDF.dNEDCl.min()),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round((valuesDF.dUDCl.max()-valuesDF.dUDCl.min()),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round((valuesDF.dEUDCl.max()-valuesDF.dEUDCl.min()),2)})
errorsDF.loc['Minimum'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.min(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.min(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.min(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.min(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.min(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.min(),2)})
errorsDF.loc['Maximum'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.max(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.max(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.max(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.max(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.max(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.max(),2)})
errorsDF.loc['Sum'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.sum(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.sum(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.sum(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.sum(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.sum(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.sum(),2)})
errorsDF.loc['Count'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.count(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.count(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.count(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDCl.count(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDCl.count(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDCl.count(),2)})
errorsDF.loc['Confidence level (95%)'] = pd.Series({'NEDC-H [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dNEDC.sem(),2), 'UDC-H [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dUDC.sem(),2), 'EUDC-H [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dEUDC.sem(),2), 'NEDC-L [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dNEDCl.sem(),2), 'UDC-L [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dUDCl.sem(),2), 'EUDC-L [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dEUDCl.sem(),2)})
errorsDF
Error statistics for NEDC, UDC, and EUDC CO 2 emission [%]
#Create a dataframe with the NECD, UDC, EUDC error statistics
errorsDF = pd.DataFrame(index=['Averages','StdDev','Minimum','Maximum','Cases in ±4%','Cases in ±2.5%'], columns=['NEDC-H [%]','UDC-H [%]', 'EUDC-H [%]','NEDC-L [%]','UDC-L [%]', 'EUDC-L [%]'])
errorsDF.loc['Averages'] = pd.Series({'NEDC-H [%]':round(valuesDF['NEDC-H error [%]'].mean(),2), 'UDC-H [%]':round(valuesDF['UDC-H error [%]'].mean(),2), 'EUDC-H [%]':round(valuesDF['EUDC-H error [%]'].mean(),2),'NEDC-L [%]':round(valuesDF['NEDC-L error [%]'].mean(),2), 'UDC-L [%]':round(valuesDF['UDC-L error [%]'].mean(),2), 'EUDC-L [%]':round(valuesDF['EUDC-L error [%]'].mean(),2)})
errorsDF.loc['StdDev'] = pd.Series({'NEDC-H [%]':round(valuesDF['NEDC-H error [%]'].std(),2), 'UDC-H [%]':round(valuesDF['UDC-H error [%]'].std(),2), 'EUDC-H [%]':round(valuesDF['EUDC-H error [%]'].std(),2),'NEDC-L [%]':round(valuesDF['NEDC-L error [%]'].std(),2), 'UDC-L [%]':round(valuesDF['UDC-L error [%]'].std(),2), 'EUDC-L [%]':round(valuesDF['EUDC-L error [%]'].std(),2)})
errorsDF.loc['Minimum'] = pd.Series({'NEDC-H [%]':round(valuesDF['NEDC-H error [%]'].min(),2), 'UDC-H [%]':round(valuesDF['UDC-H error [%]'].min(),2), 'EUDC-H [%]':round(valuesDF['EUDC-H error [%]'].min(),2),'NEDC-L [%]':round(valuesDF['NEDC-L error [%]'].min(),2), 'UDC-L [%]':round(valuesDF['UDC-L error [%]'].min(),2), 'EUDC-L [%]':round(valuesDF['EUDC-L error [%]'].min(),2)})
errorsDF.loc['Maximum'] = pd.Series({'NEDC-H [%]':round(valuesDF['NEDC-H error [%]'].max(),2), 'UDC-H [%]':round(valuesDF['UDC-H error [%]'].max(),2), 'EUDC-H [%]':round(valuesDF['EUDC-H error [%]'].max(),2),'NEDC-L [%]':round(valuesDF['NEDC-L error [%]'].max(),2), 'UDC-L [%]':round(valuesDF['UDC-L error [%]'].max(),2), 'EUDC-L [%]':round(valuesDF['EUDC-L error [%]'].max(),2)})
errorsDF.loc['Cases in ±4%'] = pd.Series({'NEDC-H [%]':round(sum(abs(valuesDF['NEDC-H error [%]'].dropna())<4)/len(valuesDF['NEDC-H error [%]'].dropna())*100,1), 'UDC-H [%]':round(sum(abs(valuesDF['UDC-H error [%]'].dropna())<4)/len(valuesDF['UDC-H error [%]'].dropna())*100,1), 'EUDC-H [%]':round(sum(abs(valuesDF['EUDC-H error [%]'].dropna())<4)/len(valuesDF['EUDC-H error [%]'].dropna())*100,1),'NEDC-L [%]':round(sum(abs(valuesDF['NEDC-L error [%]'].dropna())<4)/len(valuesDF['NEDC-L error [%]'].dropna())*100,1), 'UDC-L [%]':round(sum(abs(valuesDF['UDC-L error [%]'].dropna())<4)/len(valuesDF['UDC-L error [%]'].dropna())*100,1), 'EUDC-L [%]':round(sum(abs(valuesDF['EUDC-L error [%]'].dropna())<4)/len(valuesDF['EUDC-L error [%]'].dropna())*100,1)})
errorsDF.loc['Cases in ±2.5%'] = pd.Series({'NEDC-H [%]':round(sum(abs(valuesDF['NEDC-H error [%]'].dropna())<2.5)/len(valuesDF['NEDC-H error [%]'].dropna())*100,1), 'UDC-H [%]':round(sum(abs(valuesDF['UDC-H error [%]'].dropna())<2.5)/len(valuesDF['UDC-H error [%]'].dropna())*100,1), 'EUDC-H [%]':round(sum(abs(valuesDF['EUDC-H error [%]'].dropna())<2.5)/len(valuesDF['EUDC-H error [%]'].dropna())*100,1),'NEDC-L [%]':round(sum(abs(valuesDF['NEDC-L error [%]'].dropna())<2.5)/len(valuesDF['NEDC-L error [%]'].dropna())*100,1), 'UDC-L [%]':round(sum(abs(valuesDF['UDC-L error [%]'].dropna())<2.5)/len(valuesDF['UDC-L error [%]'].dropna())*100,1), 'EUDC-L [%]':round(sum(abs(valuesDF['EUDC-L error [%]'].dropna())<2.5)/len(valuesDF['EUDC-L error [%]'].dropna())*100,1)})
errorsDF.loc['P75-P25'] = pd.Series({'NEDC-H [%]':round(valuesDF['NEDC-H error [%]'].quantile(0.75)-valuesDF['NEDC-H error [%]'].quantile(0.25),2), 'UDC-H [%]':round(valuesDF['UDC-H error [%]'].quantile(0.75)-valuesDF['UDC-H error [%]'].quantile(0.25),2), 'EUDC-H [%]':round(valuesDF['EUDC-H error [%]'].quantile(0.75)-valuesDF['EUDC-H error [%]'].quantile(0.25),2),'NEDC-L [%]':round(valuesDF['NEDC-L error [%]'].quantile(0.75)-valuesDF['NEDC-L error [%]'].quantile(0.25),2), 'UDC-L [%]':round(valuesDF['UDC-L error [%]'].quantile(0.75)-valuesDF['UDC-L error [%]'].quantile(0.25),2), 'EUDC-L [%]':round(valuesDF['EUDC-L error [%]'].quantile(0.75)-valuesDF['EUDC-L error [%]'].quantile(0.25),2)})
errorsDF
Distribution of the NEDC, UDC and EUDC errors [%]
mydict = ([('NEDC', 0), ('UDC', 1), ('EUDC', 2)])
mydict = collections.OrderedDict(mydict)
for cycle in mydict:
if cycle == 'NEDC':
boxcolor = ['forestgreen', 'limegreen']
elif cycle == 'UDC':
boxcolor = ['darkblue', 'blue']
else:
boxcolor = ['red', 'coral']
# Create a figure instance
fig = plt.figure(1, figsize=(14, 7))
# Create an axes instance
ax = fig.add_subplot(111)
hist = valuesDF[cycle+'-H error [%]'].hist(bins=np.arange(-15, 15, 1), color=boxcolor[0], alpha=0.5, label = 'High', ec='black')
hist = valuesDF[cycle+'-L error [%]'].hist(bins=np.arange(-15, 15, 1), color=boxcolor[1], alpha=0.5, label='Low', ec='black')
hist.set_xlabel(cycle+" error [%]",fontsize=14)
hist.set_ylabel("frequency",fontsize=14)
hist.set_ylim(0,12)
plt.title(cycle+' CO$_2$ emission error distribution', fontsize=20)
plt.ylabel("frequency",fontsize=18)
plt.tick_params(axis='x', which='major', labelsize=16)
plt.tick_params(axis='y', which='major', labelsize=16)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
ax.set_xlim(-15, 15)
plt.legend()
plt.show()
NEDC, UDC, and EUDC CO2 emission error [%] per vehicle model
Carlist = list(valuesDF['Model'])
Cidlist = list(range(len(Carlist)))
valuesDF.cid = valuesDF['Model'].replace(Carlist, Cidlist, regex = False)
valuesDF['cod'] = valuesDF.cid
#Create a table with the error statistics for each car model
CarDF = pd.DataFrame(columns=['NEDC-H error [%]','UDC-H error [%]', 'EUDC-H error [%]',
'NEDC-L error [%]','UDC-L error [%]', 'EUDC-L error [%]'])
for x in Carlist:
Car = valuesDF[valuesDF['Model'] == x]
CarDF.loc[x] = pd.Series({'NEDC-H error [%]':round(Car['NEDC-H error [%]'].mean(),2), 'UDC-H error [%]':round(Car['UDC-H error [%]'].mean(),2), 'EUDC-H error [%]':round(Car['EUDC-H error [%]'].mean(),2),
'NEDC-L error [%]':round(Car['NEDC-L error [%]'].mean(),2), 'UDC-L error [%]':round(Car['UDC-L error [%]'].mean(),2), 'EUDC-L error [%]':round(Car['EUDC-L error [%]'].mean(),2)})
CarDF.columns.name='Error'
display(CarDF)
mydict = ([('NEDC-H', 0), ('UDC-H', 1), ('EUDC-H', 2),('NEDC-L', 3), ('UDC-L', 4), ('EUDC-L', 5)])
mydict = collections.OrderedDict(mydict)
for cycle in mydict:
if cycle == 'NEDC-H':
boxcolor = 'green'
elif cycle == 'NEDC-L':
boxcolor = 'green'
elif cycle == 'UDC-H':
boxcolor = 'blue'
elif cycle == 'UDC-L':
boxcolor = 'blue'
else:
boxcolor = 'red'
#plot the emission error per model, and cycle
fig = plt.figure(1, figsize=(14, 7))
_valuesDF = valuesDF[['Model', 'cod', cycle+' error [%]']].dropna()
labels = ['{0}'.format(i) for i in _valuesDF['Model']]
plt.title(cycle+" error [%] per real vehicle",fontsize=20)
plot = fig.add_subplot(111)
plot.tick_params(axis='x', which='major', labelsize=14)
plot.tick_params(axis='y', which='major', labelsize=14)
plot.set_xlim(-1, 65)
plot.set_ylim(-12,12)
plot.get_xaxis().tick_bottom()
plot.get_yaxis().tick_left()
# plt.scatter(valuesDF['Case'],valuesDF[cycle+' error [%]'], color=boxcolor, marker = 'o')
plt.scatter(_valuesDF['cod'],_valuesDF[cycle+' error [%]'], color=boxcolor, marker = 'o')
# for label, x, y in zip(labels, valuesDF['Case'], valuesDF[cycle+' error [%]']):
for label, x, y in zip(labels, _valuesDF['cod'], _valuesDF[cycle+' error [%]'].fillna(0)):
plt.annotate(label, xy = (x, y), size = 12)
plot.set_xlabel("Vehicle #",fontsize=20)
plot.set_ylabel(cycle+" error [%]",fontsize=20)
line1 = plot.axhline(y=-2.5, color='grey', linestyle='-.', label='± 2.5%')
line2 = plot.axhline(y=2.5, color='grey', linestyle='-.')
line3 = plot.axhline(y=-4, color='black', linestyle='--', label='± 4.0%')
line4 = plot.axhline(y=4, color='black', linestyle='--')
plt.legend(handles=[line1, line3], loc = 3)
plt.show()