Assignment 22 - Solutions
.pdf
keyboard_arrow_up
School
Northeastern University *
*We aren’t endorsed by this school
Course
6400
Subject
Industrial Engineering
Date
Apr 3, 2024
Type
Pages
33
Uploaded by CaptainSparrowMaster1000
12/13/23, 8:10 PM
Assignment 22 - Jupyter Notebook
localhost:8888/notebooks/IE6400 Foundations for Data Analytics Engineering/Assignments/Assignment 22.ipynb
1/33
Assignment 22:
In [1]:
WARNING:tensorflow:From C:\Users\LaxmanRao\anaconda3\Lib\site-packages\keras\src\losses.py:2976: The name t
f.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_ent
ropy instead.
C:\Users\LaxmanRao\anaconda3\Lib\site-packages\fuzzywuzzy\fuzz.py:11: UserWarning: Using slow pure-python Se
quenceMatcher. Install python-Levenshtein to remove this warning
warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
import
os
import
matplotlib.pyplot as
plt
from
PIL import
Image
import
pandas as
pd
import
numpy as
np
import
tensorflow as
tf
from
tensorflow.keras.applications import
MobileNetV2, MobileNetV3Small
import
pickle
import
hnswlib
from
fuzzywuzzy import
process
from
typing import
List
import
time
12/13/23, 8:10 PM
Assignment 22 - Jupyter Notebook
localhost:8888/notebooks/IE6400 Foundations for Data Analytics Engineering/Assignments/Assignment 22.ipynb
2/33
Question 1:
Data Collection and Preprocessing:
12/13/23, 8:10 PM
Assignment 22 - Jupyter Notebook
localhost:8888/notebooks/IE6400 Foundations for Data Analytics Engineering/Assignments/Assignment 22.ipynb
3/33
In [2]:
# Set the path to the directory where the dataset is stored
dataset_directory =
'./Textures-Dataset/train'
# Initialize a counter for the number of classes processed
num_classes =
0
# Initialize a figure for plotting
plt.figure(figsize
=
(
15
, 10
))
# Display one image from each class in both vertical and horizontal orientations
for
class_name in
os.listdir(dataset_directory):
class_dir =
os.path.join(dataset_directory, class_name)
# Check if it's a directory
if
os.path.isdir(class_dir):
# Get the first image in the directory
image_name =
os.listdir(class_dir)[
0
]
image_path =
os.path.join(class_dir, image_name)
# Load the image
image =
Image.open(image_path)
# Increment the class counter
num_classes +=
1
# Add a subplot for the original image
plt.subplot(
2
, 5
, num_classes)
plt.imshow(image)
plt.title(class_name)
plt.axis(
'off'
)
# Check if we have processed 5 classes
if
num_classes ==
5
:
break
# Adjust subplot parameters for a nicer layout
plt.tight_layout()
plt.show()
12/13/23, 8:10 PM
Assignment 22 - Jupyter Notebook
localhost:8888/notebooks/IE6400 Foundations for Data Analytics Engineering/Assignments/Assignment 22.ipynb
4/33
In [3]:
In [4]:
In [5]:
train_dataset_path =
'./Textures-Dataset/train'
validation_dataset_path =
'./Textures-Dataset/valid'
def
create_metadata_df
(dataset_directory: str
):
ids, image_paths, image_categories =
[], [], []
counter =
1
for
category in
os.listdir(dataset_directory):
category_dir =
os.path.join(dataset_directory, category)
if
os.path.isdir(category_dir):
for
image_name in
os.listdir(category_dir):
image_path =
os.path.join(category_dir, image_name)
if
os.path.exists(image_path):
image_categories.append(category)
image_paths.append(image_path)
ids.append(image_name)
counter +=
1
return
pd.DataFrame({
'image_id'
: ids, 'image_path'
: image_paths, 'category'
: image_categories})
train_metadata_df =
create_metadata_df(train_dataset_path)
validation_metadata_df =
create_metadata_df(validation_dataset_path)
12/13/23, 8:10 PM
Assignment 22 - Jupyter Notebook
localhost:8888/notebooks/IE6400 Foundations for Data Analytics Engineering/Assignments/Assignment 22.ipynb
5/33
In [6]:
In [7]:
In [8]:
In [9]:
Out[6]:
image_id
image_path
class
0
1.jpg
./Textures-Dataset/train\KTH_aluminium_foil\1.jpg
KTH_aluminium_foil
1
101.jpg
./Textures-Dataset/train\KTH_aluminium_foil\10...
KTH_aluminium_foil
2
103.jpg
./Textures-Dataset/train\KTH_aluminium_foil\10...
KTH_aluminium_foil
3
105.jpg
./Textures-Dataset/train\KTH_aluminium_foil\10...
KTH_aluminium_foil
4
107.jpg
./Textures-Dataset/train\KTH_aluminium_foil\10...
KTH_aluminium_foil
Out[7]:
image_id
image_path
class
0
0.jpg
./Textures-Dataset/valid\KTH_aluminium_foil\0.jpg
KTH_aluminium_foil
1
10.jpg
./Textures-Dataset/valid\KTH_aluminium_foil\10...
KTH_aluminium_foil
2
100.jpg
./Textures-Dataset/valid\KTH_aluminium_foil\10...
KTH_aluminium_foil
3
102.jpg
./Textures-Dataset/valid\KTH_aluminium_foil\10...
KTH_aluminium_foil
4
104.jpg
./Textures-Dataset/valid\KTH_aluminium_foil\10...
KTH_aluminium_foil
Number of Training Images: 4335
Number of Validation Images: 4340
Number of Unique Categories: 64
train_metadata_df.head()
validation_metadata_df.head()
print
(
f'Number of Training Images: {
len
(train_metadata_df)}
'
)
print
(
f'Number of Validation Images: {
len
(validation_metadata_df)}
'
)
print
(
f'Number of Unique Categories: {train_metadata_df[
"category"
].nunique()}
'
)
categories_freq =
train_metadata_df[
'category'
].value_counts().reset_index(name
=
'frequency'
).sort_values(
'fre
12/13/23, 8:10 PM
Assignment 22 - Jupyter Notebook
localhost:8888/notebooks/IE6400 Foundations for Data Analytics Engineering/Assignments/Assignment 22.ipynb
6/33
In [10]:
plt.figure(figsize
=
(
10
, 13
))
plt.title(
'Samples Per Class'
)
plt.barh(categories_freq[
'category'
], categories_freq[
'frequency'
])
plt.xlabel(
'Number of Samples Per Class'
)
plt.ylabel(
'Class'
)
plt.show()
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help