Assignment 22 - Solutions

.pdf

School

Northeastern University *

*We aren’t endorsed by this school

Course

6400

Subject

Industrial Engineering

Date

Apr 3, 2024

Type

pdf

Pages

33

Uploaded by CaptainSparrowMaster1000

Report
12/13/23, 8:10 PM Assignment 22 - Jupyter Notebook localhost:8888/notebooks/IE6400 Foundations for Data Analytics Engineering/Assignments/Assignment 22.ipynb 1/33 Assignment 22: In [1]: WARNING:tensorflow:From C:\Users\LaxmanRao\anaconda3\Lib\site-packages\keras\src\losses.py:2976: The name t f.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_ent ropy instead. C:\Users\LaxmanRao\anaconda3\Lib\site-packages\fuzzywuzzy\fuzz.py:11: UserWarning: Using slow pure-python Se quenceMatcher. Install python-Levenshtein to remove this warning warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning') import os import matplotlib.pyplot as plt from PIL import Image import pandas as pd import numpy as np import tensorflow as tf from tensorflow.keras.applications import MobileNetV2, MobileNetV3Small import pickle import hnswlib from fuzzywuzzy import process from typing import List import time
12/13/23, 8:10 PM Assignment 22 - Jupyter Notebook localhost:8888/notebooks/IE6400 Foundations for Data Analytics Engineering/Assignments/Assignment 22.ipynb 2/33 Question 1: Data Collection and Preprocessing:
12/13/23, 8:10 PM Assignment 22 - Jupyter Notebook localhost:8888/notebooks/IE6400 Foundations for Data Analytics Engineering/Assignments/Assignment 22.ipynb 3/33 In [2]: # Set the path to the directory where the dataset is stored dataset_directory = './Textures-Dataset/train' # Initialize a counter for the number of classes processed num_classes = 0 # Initialize a figure for plotting plt.figure(figsize = ( 15 , 10 )) # Display one image from each class in both vertical and horizontal orientations for class_name in os.listdir(dataset_directory): class_dir = os.path.join(dataset_directory, class_name) # Check if it's a directory if os.path.isdir(class_dir): # Get the first image in the directory image_name = os.listdir(class_dir)[ 0 ] image_path = os.path.join(class_dir, image_name) # Load the image image = Image.open(image_path) # Increment the class counter num_classes += 1 # Add a subplot for the original image plt.subplot( 2 , 5 , num_classes) plt.imshow(image) plt.title(class_name) plt.axis( 'off' ) # Check if we have processed 5 classes if num_classes == 5 : break # Adjust subplot parameters for a nicer layout plt.tight_layout() plt.show()
12/13/23, 8:10 PM Assignment 22 - Jupyter Notebook localhost:8888/notebooks/IE6400 Foundations for Data Analytics Engineering/Assignments/Assignment 22.ipynb 4/33 In [3]: In [4]: In [5]: train_dataset_path = './Textures-Dataset/train' validation_dataset_path = './Textures-Dataset/valid' def create_metadata_df (dataset_directory: str ): ids, image_paths, image_categories = [], [], [] counter = 1 for category in os.listdir(dataset_directory): category_dir = os.path.join(dataset_directory, category) if os.path.isdir(category_dir): for image_name in os.listdir(category_dir): image_path = os.path.join(category_dir, image_name) if os.path.exists(image_path): image_categories.append(category) image_paths.append(image_path) ids.append(image_name) counter += 1 return pd.DataFrame({ 'image_id' : ids, 'image_path' : image_paths, 'category' : image_categories}) train_metadata_df = create_metadata_df(train_dataset_path) validation_metadata_df = create_metadata_df(validation_dataset_path)
12/13/23, 8:10 PM Assignment 22 - Jupyter Notebook localhost:8888/notebooks/IE6400 Foundations for Data Analytics Engineering/Assignments/Assignment 22.ipynb 5/33 In [6]: In [7]: In [8]: In [9]: Out[6]: image_id image_path class 0 1.jpg ./Textures-Dataset/train\KTH_aluminium_foil\1.jpg KTH_aluminium_foil 1 101.jpg ./Textures-Dataset/train\KTH_aluminium_foil\10... KTH_aluminium_foil 2 103.jpg ./Textures-Dataset/train\KTH_aluminium_foil\10... KTH_aluminium_foil 3 105.jpg ./Textures-Dataset/train\KTH_aluminium_foil\10... KTH_aluminium_foil 4 107.jpg ./Textures-Dataset/train\KTH_aluminium_foil\10... KTH_aluminium_foil Out[7]: image_id image_path class 0 0.jpg ./Textures-Dataset/valid\KTH_aluminium_foil\0.jpg KTH_aluminium_foil 1 10.jpg ./Textures-Dataset/valid\KTH_aluminium_foil\10... KTH_aluminium_foil 2 100.jpg ./Textures-Dataset/valid\KTH_aluminium_foil\10... KTH_aluminium_foil 3 102.jpg ./Textures-Dataset/valid\KTH_aluminium_foil\10... KTH_aluminium_foil 4 104.jpg ./Textures-Dataset/valid\KTH_aluminium_foil\10... KTH_aluminium_foil Number of Training Images: 4335 Number of Validation Images: 4340 Number of Unique Categories: 64 train_metadata_df.head() validation_metadata_df.head() print ( f'Number of Training Images: { len (train_metadata_df)} ' ) print ( f'Number of Validation Images: { len (validation_metadata_df)} ' ) print ( f'Number of Unique Categories: {train_metadata_df[ "category" ].nunique()} ' ) categories_freq = train_metadata_df[ 'category' ].value_counts().reset_index(name = 'frequency' ).sort_values( 'fre
12/13/23, 8:10 PM Assignment 22 - Jupyter Notebook localhost:8888/notebooks/IE6400 Foundations for Data Analytics Engineering/Assignments/Assignment 22.ipynb 6/33 In [10]: plt.figure(figsize = ( 10 , 13 )) plt.title( 'Samples Per Class' ) plt.barh(categories_freq[ 'category' ], categories_freq[ 'frequency' ]) plt.xlabel( 'Number of Samples Per Class' ) plt.ylabel( 'Class' ) plt.show()
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help