PYTHON import pandas as pd from datetime import date import sys from sklearn.preprocessing import OrdinalEncoder def series_report(     series, is_ordinal=False, is_continuous=False, is_categorical=False ):     print(f"{series.name}: {series.dtype}")     ###### Your code here ######      # Check command line arguments if len(sys.argv) < 2:     print(f"Usage: python3 {sys.argv[0]} ")     exit(1) # Read in the data df = pd.read_csv(     sys.argv[1], index_col="employee_id" ) # Convert strings to dates for dob and death df['dob'] = df['dob'].apply(lambda x: date.fromisoformat(x)) df['death'] = df['death'].apply(lambda x: date.fromisoformat(x)) # Show the shape of the dataframe (row_count, col_count) = df.shape print(f"*** Basics ***") print(f"Rows: {row_count:,}") print(f"Columns: {col_count}") # Do a report for each column print(f"\n*** Columns ***") series_report(df.index, is_ordinal=True) series_report(df["gender"], is_categorical=True) series_report(df["height"], is_ordinal=True, is_continuous=True) series_report(df["waist"], is_ordinal=True, is_continuous=True) series_report(df["salary"], is_ordinal=True, is_continuous=True) series_report(df["dob"], is_ordinal=True) series_report(df["death"], is_ordinal=True)   When you fill in the missing lines, you will be able to run it like this: python3 make_report.py employees.csv Then, it will print a report like this: *** Basics *** Rows: 10,000 Columns: 6 *** Columns *** employee_id: int64 Range: 1712 - 9998838 gender: object Missing in 82 rows (0.8%) 4917: m 4907: f 36: F 23: M 19: male 16: female height: float64 Range: 1.34 - 2.07 Mean: 1.71 Standard deviation: 0.11 Median: 1.71 waist: float64 Range: 0.47 - 2.18 Mean: 1.21 Standard deviation: 0.23 Median: 1.19 salary: float64 Missing in 70 rows (0.7%) 3 Range: 297.0 - 140902.0 Mean: 63033.98 Standard deviation: 20093.83 Median: 63078.50 dob: object Range: 1945-01-01 - 1984-12-21 death: object Range: 1960-03-20 - 2022-06-12

Microsoft Visual C#
7th Edition
ISBN:9781337102100
Author:Joyce, Farrell.
Publisher:Joyce, Farrell.
Chapter3: Using Gui Objects And The Visual Studio Ide
Section: Chapter Questions
Problem 12RQ: A(n) _____________ is generated when a user interacts with a GUI object. a. error b. occasion c....
icon
Related questions
Question

PYTHON

import pandas as pd
from datetime import date
import sys

from sklearn.preprocessing import OrdinalEncoder

def series_report(
    series, is_ordinal=False, is_continuous=False, is_categorical=False
):
    print(f"{series.name}: {series.dtype}")
    ###### Your code here ######
    
# Check command line arguments
if len(sys.argv) < 2:
    print(f"Usage: python3 {sys.argv[0]} <input_file>")
    exit(1)

# Read in the data
df = pd.read_csv(
    sys.argv[1], index_col="employee_id"
)

# Convert strings to dates for dob and death
df['dob'] = df['dob'].apply(lambda x: date.fromisoformat(x))
df['death'] = df['death'].apply(lambda x: date.fromisoformat(x))

# Show the shape of the dataframe
(row_count, col_count) = df.shape
print(f"*** Basics ***")
print(f"Rows: {row_count:,}")
print(f"Columns: {col_count}")

# Do a report for each column
print(f"\n*** Columns ***")
series_report(df.index, is_ordinal=True)
series_report(df["gender"], is_categorical=True)
series_report(df["height"], is_ordinal=True, is_continuous=True)
series_report(df["waist"], is_ordinal=True, is_continuous=True)
series_report(df["salary"], is_ordinal=True, is_continuous=True)
series_report(df["dob"], is_ordinal=True)
series_report(df["death"], is_ordinal=True)

 

When you fill in the missing lines, you will be able to run
it like this:
python3 make_report.py employees.csv
Then, it will print a report like this:
*** Basics ***
Rows: 10,000
Columns: 6
*** Columns ***
employee_id: int64
Range: 1712 - 9998838
gender: object
Missing in 82 rows (0.8%)
4917: m
4907: f
36: F
23: M
19: male
16: female
height: float64
Range: 1.34 - 2.07
Mean: 1.71
Standard deviation: 0.11
Median: 1.71
waist: float64
Range: 0.47 - 2.18
Mean: 1.21
Standard deviation: 0.23
Median: 1.19
salary: float64
Missing in 70 rows (0.7%)
3
Range: 297.0 - 140902.0
Mean: 63033.98
Standard deviation: 20093.83
Median: 63078.50
dob: object
Range: 1945-01-01 - 1984-12-21
death: object
Range: 1960-03-20 - 2022-06-12 

Expert Solution
trending now

Trending now

This is a popular solution!

steps

Step by step

Solved in 3 steps

Blurred answer
Knowledge Booster
Unreferenced Objects
Learn more about
Need a deep-dive on the concept behind this application? Look no further. Learn more about this topic, computer-science and related others by exploring similar questions and additional content below.
Similar questions
  • SEE MORE QUESTIONS
Recommended textbooks for you
Microsoft Visual C#
Microsoft Visual C#
Computer Science
ISBN:
9781337102100
Author:
Joyce, Farrell.
Publisher:
Cengage Learning,