TSSFL Stack Live ShowCase During Sabasaba International Trade Fair In Dar Es Salaam, Tanzania
Posted: Mon Jul 05, 2021 8:12 pm
TSSFL ODF Live ShowCase During Sabasaba International Trade Fair In Dar Es Salaam, Tanzania, From 28th June - 13th July 2021
Visualizing the Global 2019 GDP Per Capita, Life Expectancy, and other Social Factors Dataset
Below we show how to use the data science tools, in particular, Python programming language to visualize the relationship between economic and social factors. We use this dataset which features GDP per capita, social support, healthy life expectancy, freedom to make choices, generosity, and so on, all over the world. Run the code below:
Visualizing the Global 2019 GDP Per Capita, Life Expectancy, and other Social Factors Dataset
Below we show how to use the data science tools, in particular, Python programming language to visualize the relationship between economic and social factors. We use this dataset which features GDP per capita, social support, healthy life expectancy, freedom to make choices, generosity, and so on, all over the world. Run the code below:
- import numpy as np
- import pandas as pd
- import seaborn as sns
- import matplotlib.pyplot as plt
- #We use the dataset called "2019.csv" found at https://github.com/fati8999-tech/Data-visualization-with-Python-Using-Seaborn-and-Plotly_-GDP-per-Capita-Life-Expectency-Dataset/blob/master/2019.csv
- #Pull the "raw" GitHub content
- df = pd.read_csv('https://raw.githubusercontent.com/fati8999-tech/Data-visualization-with-Python-Using-Seaborn-and-Plotly_-GDP-per-Capita-Life-Expectency-Dataset/master/2019.csv')
- print(df.head(5))
- #Configure plotting parameters
- import seaborn as sns
- #plt.style.use('ggplot')
- sns.set_style('darkgrid') # darkgrid, white grid, dark, white and ticks
- plt.rc('axes', titlesize=18) # fontsize of the axes title
- plt.rc('axes', labelsize=14) # fontsize of the x and y labels
- plt.rc('xtick', labelsize=13) # fontsize of the tick labels
- plt.rc('ytick', labelsize=13) # fontsize of the tick labels
- plt.rc('legend', fontsize=13) # legend fontsize
- plt.rc('font', size=13)
- colors1 = sns.color_palette('pastel')
- colors2 = sns.color_palette('deep')
- #colors = sns.color_palette("Set2")
- #Let's plot a distribution of a single column in a dataframe (GDP per capita)
- #using sns.distplot(dataofsinglecolumn)
- sns.distplot(df['GDP per capita'], bins=10, color="magenta") #Use 10 bins
- plt.show()
- plt.clf()
- #Let's use 25 bins and remove KDE
- sns.distplot(df['GDP per capita'], kde = False , bins = 25, color="magenta")
- plt.show()
- #Jointplot
- #Let's visualize the relationship between two variables using scatter and histogram plots
- sns.jointplot(x=df['GDP per capita'], y= df['Healthy life expectancy'],data=df, color="green") #Two ditribution x and y
- plt.show()
- plt.clf()
- #Let's draw scatter plot using function kind = "", and bin the data into
- #hexagons with histogram in the margins
- sns.jointplot(x=df['GDP per capita'], y= df['Healthy life expectancy'],data=df,kind='reg', color=colors2[6])
- plt.show()
- plt.clf()
- #
- sns.jointplot(x=df['GDP per capita'], y= df['Healthy life expectancy'],data=df,kind='resid', color=colors1[5])
- plt.show()
- plt.clf()
- sns.jointplot(x=df['GDP per capita'], y= df['Healthy life expectancy'],data=df,kind='kde', color="purple")
- plt.show()
- plt.clf()
- sns.jointplot(x=df['GDP per capita'], y= df['Healthy life expectancy'],data=df,kind='hist', color="darkblue")
- plt.show()
- plt.clf()
- sns.jointplot(x=df['GDP per capita'], y= df['Healthy life expectancy'],data=df,kind='hex', color="red")
- plt.show()
- plt.clf()
- #Results show that GDP per capita and Healthy life expectancy are positively linearly correlated
- df_sorted = df.sort_values('GDP per capita',ascending=False)
- #Let's plot categorical GDP per capita for top ten countries
- plt.figure(figsize=(10, 6), tight_layout=True)
- sns.barplot(x=df_sorted['GDP per capita'],y=df_sorted['Country or region'].head(10),data=df_sorted, color="darkcyan")
- plt.xticks(rotation=90)
- plt.title("Top 10 Countries with Highest GDP per Capita")
- for i, v in enumerate(df_sorted['GDP per capita'].head(10)):
- plt.text(v+0.01, i, str(round(v, 4)), color='steelblue', va="center")
- plt.text(v+0.3, i, str(i+1), color='black', va="center")
- #plt.subplots_adjust(left=0.3)
- textstr = 'Created at \nwww.tssfl.com'
- #plt.text(0.02, 0.5, textstr, fontsize=14, transform=plt.gcf().transFigure)
- plt.gcf().text(0.02, 0.9, textstr, fontsize=14, color='green') # (0,0) is bottom left, (1,1) is top right
- plt.show()
- plt.clf()
- df_sorted = df.sort_values('GDP per capita',ascending=False)
- #Let's plot categorical GDP per capital for top ten countries
- plt.figure(figsize=(8,6), tight_layout=True)
- sns.barplot(x=df_sorted['Country or region'].head(10), y=df_sorted['GDP per capita'],data=df_sorted, color="darkcyan")
- plt.xticks(rotation=90)
- plt.title("Top 10 Countries with Highest GDP per Capita")
- xlocs, xlabs = plt.xticks()
- for i, v in enumerate(df_sorted['GDP per capita'].head(10)):
- plt.text(xlocs[i] - 0.25, v + 0.05, str(v), color='steelblue', va="center")
- plt.gcf().text(0.02, 0.1, textstr, fontsize=14, color='green')
- plt.show()
- plt.clf()
- #Let's plot categorical GDP per capital for top ten countries
- df_sorted = df.sort_values('GDP per capita',ascending=True)
- plt.figure(figsize=(8,8), tight_layout=True)
- sns.barplot(x=df_sorted['GDP per capita'],y=df_sorted['Country or region'].head(10),data=df_sorted, color="darkmagenta")
- plt.xticks(rotation=90)
- plt.title("Countries with Lowest GDP per Capita")
- for i, v in enumerate(df_sorted['GDP per capita'].head(10)):
- plt.text(v+0.01, i, str(round(v, 4)), color='teal', va="center")
- plt.gcf().text(0.7, 0.85, textstr, fontsize=14, color='green')
- plt.show()
- plt.clf()
- df_sorted = df.sort_values('GDP per capita',ascending=True)
- #Let's plot categorical GDP per capital for top ten countries
- plt.figure(figsize=(8,8), tight_layout=True)
- sns.barplot(x=df_sorted['Country or region'].head(10), y=df_sorted['GDP per capita'],data=df_sorted, color="darkmagenta")
- plt.xticks(rotation=90)
- plt.title("Countries with Lowest GDP per Capita")
- xlocs, xlabs = plt.xticks()
- for i, v in enumerate(df_sorted['GDP per capita'].head(10)):
- plt.text(xlocs[i] - 0.25, v + 0.01, str(v), color='teal', va="center")
- plt.gcf().text(0.2, 0.85, textstr, fontsize=14, color='green')
- plt.show()
- plt.clf()
- df_sorted = df.sort_values('GDP per capita',ascending=True)
- #Let's plot categorical GDP per capital for top ten countries
- plt.figure(figsize=(12,40), tight_layout=True)
- sns.barplot(x=df_sorted['GDP per capita'],y=df_sorted['Country or region'],data=df_sorted, color="lightblue")
- plt.xticks(rotation=90)
- plt.title("GDP per Capita")
- for i, v in enumerate(df_sorted['GDP per capita']):
- plt.text(v+0.01, i, str(round(v, 4)), color='teal', va="center")
- plt.text(v+0.15, i, str(157-(i+1)), color='black', va="center")
- plt.gcf().text(0.55, 0.96, textstr, fontsize=14, color='green')
- plt.show()
- plt.clf()
- df_sorted = df.sort_values('GDP per capita',ascending=False)
- #Let's plot categorical GDP per capital for top ten countries
- plt.figure(figsize=(12,40), tight_layout=True)
- sns.barplot(x=df_sorted['GDP per capita'],y=df_sorted['Country or region'],data=df_sorted, color="lightblue")
- plt.xticks(rotation=90)
- plt.title("GDP per Capita")
- for i, v in enumerate(df_sorted['GDP per capita']):
- plt.text(v+0.01, i, str(round(v, 4)), color='teal', va="center")
- plt.text(v+0.15, i, str(i+1), color='black', va="center")
- plt.gcf().text(0.02, 0.99, textstr, fontsize=14, color='green')
- plt.show()
- plt.clf()
- #End
- #Let's plot categorical GDP per capital for top ten countries
- plt.figure(figsize=(8,5), tight_layout=True)
- sns.barplot(x=df['Country or region'].tail(10),y=df['GDP per capita'],data=df, color="olive")
- plt.xticks(rotation=90)
- plt.show()
- plt.clf()
- #Matrix plot visualizing correlation btn the data selected
- data_select = df[['GDP per capita','Social support','Healthy life expectancy','Perceptions of corruption']]
- print("Correlation between Data:")
- print(data_select.corr())
- #Visualize
- #Change color as you want https://matplotlib.org/tutorials/colors/colormaps.html
- plt.figure(figsize=(8,6), tight_layout=True)
- sns.heatmap(data_select.corr(), cmap='coolwarm')
- plt.title("Matrix Plot")
- plt.show()
- plt.clf()
- #Let's get various relationships for the entire dataset
- #Get the distribution of a single variable by hist and of two variables by scatter
- plt.style.use('ggplot')
- sns.pairplot(df)
- plt.show()
- plt.clf()