In Class Assignment - DA 621
.py
keyboard_arrow_up
School
University of Nevada, Las Vegas *
*We aren’t endorsed by this school
Course
621
Subject
Industrial Engineering
Date
Jan 9, 2024
Type
py
Pages
2
Uploaded by AgentFog22428
import numpy as np # linear algebra
import statistics
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
#to ignore warning
import warnings
warnings.filterwarnings('ignore')
flight_data = pd.read_csv('/Users/stevenleng/Desktop/DA 621 Milestone
Project/archive/Clean_Dataset.csv')
print(flight_data.columns) # It shows how many columns
print(flight_data.shape) # It shows how many rows and columns from the dataset
print(flight_data.head(30)) #It shows the first 29 rows from the dataset
flight_data.info() #shows the variables from the dataset
flight_data = flight_data.drop(['Unnamed: 0'], axis = 1) #To remove the (Unnamed:
0) variable,
assuming they dont have
any predictive power to predict the
dependent variable
flight_data.info() #shows the latest variable from the dataset after dropping the
unused variable
print(flight_data.describe()) # measurement of centrality
print(flight_data.isnull().sum()) # To check if there is null value
print(flight_data.duplicated().sum())# To check if there is duplicate
flight_price_mean = flight_data['price'] # mean of the price
print(flight_price_mean)
total = 0
for val in flight_price_mean:
total += val
mean = total/len(flight_price_mean)
print(f'hand written mean: {mean}')
def calculate_median(numbers):# median of the price
if len(numbers) == 0:
return None # Handle the case of an empty list
return statistics.median(numbers)
numbers = flight_data["price"]
median = calculate_median(numbers)
print(f'hand written median: {median}')
def calculate_mode(numbers):# mode of the price
if len(numbers) == 0:
return None # Handle the case of an empty list
return statistics.mode(numbers)
# Example usage:
numbers = flight_data["price"]
mode = calculate_mode(numbers)
print(f'hand written mode: {mode}')
def variance(flight_data, group=2):
# get the mean
avg = mean(flight_data)
# traverse the list and do the math
total = 0
for value in flight_data:
total += (value - avg)**2
# decide if you want sample or population
# do the math and return the result
return total / (len(flight_data) - 1)
print(f'hand written variance: {variance}')
plt.figure(figsize = (10, 5))
plt.title('Count of flights month wise')
ax=sns.countplot(x = 'Journey_month', data = flight_data)
plt.xlabel('Month')
plt.ylabel('Count of flights')
for p in ax.patches:
ax.annotate(int(p.get_height()), (p.get_x()+0.25, p.get_height()+1),
va='bottom', color= 'black')
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help