- import urllib.request
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- urllib.request.urlretrieve("https://www.dropbox.com/scl/fi/54wsp05dmhvefj7mp5z1h/shift-data.xlsx?dl=1&rlkey=la2c42rft7mp33lh9u38eruv6", "shift-data.xlsx")
- urllib.request.urlretrieve("https://www.dropbox.com/scl/fi/7xmbxp3vluks1uux5cbj5/third-shift-data.xlsx?dl=1&rlkey=vgsk9ttpxi8txvd3rjjnekfyv", "third-shift-data.xlsx")
- excel_file_1 = "shift-data.xlsx"
- excel_file_2 = "third-shift-data.xlsx"
- #Read Excel sheets
- sheet1 = pd.read_excel(excel_file_1, sheet_name="first")
- sheet2 = pd.read_excel(excel_file_1, sheet_name="second")
- sheet3 = pd.read_excel(excel_file_2) #There is only one sheet
- #Print data from excel sheet1
- #print(sheet1)
- #Print columns, e.g., with products
- #print(sheet1["Product"])
- #Let's combine all data together
- all_sheets = pd.concat([sheet1, sheet2, sheet3])
- #Print new file
- print(all_sheets) #The number of columns is the same but rows increased, 87 rows X 6 columns
- #Perform some calculations
- pivot = all_sheets.groupby(["Shift"]).mean()
- productivity = pivot.loc[:,"Production Run Time (Min)":"Products Produced (Units)"]
- print(productivity)
- #Plot some graph
- charts = ["bar", "line", "barh", "hist", "box", "kde", "density", "area"]
- for chart_type in charts:
- productivity.plot(kind="%s" % chart_type) #Replace bar with line, barh, hist, box, kde, density, area
- plt.title("%s plot" % chart_type)
- plt.show()
- #Output data to new excel workbook
- all_sheets.to_excel("new_sheet.xlsx")
See attached graphical reports from this sample code.
Data used:
Excel file 1, and excel file 2.