Skip to content
Snippets Groups Projects
Commit d3110caa authored by Paul G's avatar Paul G
Browse files

init

parent 072a1158
No related branches found
No related tags found
No related merge requests found
.venv
.venv\Lib\site-packages
\ No newline at end of file
%% Cell type:code id: tags:
``` python
import os
import random
# Pfad zum Ordner, in dem sich die zu löschenden Dateien befinden
folder_path = "D:/Studium/Masterarbeit/Einarbeitung/Codebeispiele/detecting_anomalies/data/cell_images/parasitized"
# Liste der Dateinamen im Ordner
file_list = os.listdir(folder_path)
# Anzahl der Dateien, die Sie löschen möchten
num_files_to_delete = 0
# Zufällige Auswahl der Dateien zum Löschen
files_to_delete = random.sample(file_list, num_files_to_delete)
# Schleife zum Löschen der ausgewählten Dateien
for file_name in files_to_delete:
file_path = os.path.join(folder_path, file_name)
os.remove(file_path)
print(f"Datei {file_name} wurde gelöscht.")
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
# Quelle
https://github.com/bnsreenu/python_for_microscopists/blob/master/260_image_anomaly_detection_using_autoencoders/260_image_anomaly_detection_using_autoencoders.py
``Infos``\
Detecting anomaly images using AutoEncoders. (Sorting an entire image as either normal or anomaly)\
Here, we use both the reconstruction error and also the kernel density estimation based on the vectors in the latent space.
We will consider the bottleneck layer outputfrom our autoencoder as the latent space.\
This code uses the malarial data set but it can be easily applied to any application.
Data from: https://data.lhncbc.nlm.nih.gov/public/Malaria/cell_images.zip
%% Cell type:code id: tags:
``` python
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import random
```
%% Cell type:code id: tags:
``` python
#Size of our input images
SIZE = 128
#Define generators for training, validation and also anomaly data.
batch_size = 64
datagen = ImageDataGenerator(rescale=1./255)
train_generator = datagen.flow_from_directory(
'data/cell_images/uninfected_train/',
target_size=(SIZE, SIZE),
batch_size=batch_size,
class_mode='input'
)
'''
validation_generator = datagen.flow_from_directory(
'data/cell_images/uninfected_test/',
target_size=(SIZE, SIZE),
batch_size=batch_size,
class_mode='input'
)
anomaly_generator = datagen.flow_from_directory(
'data/cell_images/parasitized/',
target_size=(SIZE, SIZE),
batch_size=batch_size,
class_mode='input'
)
'''
```
%% Output
Found 0 images belonging to 0 classes.
"\nvalidation_generator = datagen.flow_from_directory(\n 'data/cell_images/uninfected_test/',\n target_size=(SIZE, SIZE),\n batch_size=batch_size,\n class_mode='input'\n )\n\nanomaly_generator = datagen.flow_from_directory(\n 'data/cell_images/parasitized/',\n target_size=(SIZE, SIZE),\n batch_size=batch_size,\n class_mode='input'\n )\n"
%% Cell type:code id: tags:
``` python
#Define the autoencoder.
#Try to make the bottleneck layer size as small as possible to make it easy for
#density calculations and also picking appropriate thresholds.
#Encoder
model = Sequential()
model.add(Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(SIZE, SIZE, 3)))
model.add(MaxPooling2D((2, 2), padding='same'))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2), padding='same'))
model.add(Conv2D(16, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2), padding='same'))
#Decoder
model.add(Conv2D(16, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D((2, 2)))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D((2, 2)))
model.add(Conv2D(3, (3, 3), activation='sigmoid', padding='same'))
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])
model.summary()
#Fit the model.
history = model.fit(
train_generator,
steps_per_epoch= 500 // batch_size,
epochs=1000,
validation_data=validation_generator,
validation_steps=75 // batch_size,
shuffle = True)
#plot the training and validation accuracy and loss at each epoch
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'y', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
```
%% Output
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 128, 128, 64) 1792
max_pooling2d (MaxPooling2 (None, 64, 64, 64) 0
D)
conv2d_1 (Conv2D) (None, 64, 64, 32) 18464
max_pooling2d_1 (MaxPoolin (None, 32, 32, 32) 0
g2D)
conv2d_2 (Conv2D) (None, 32, 32, 16) 4624
max_pooling2d_2 (MaxPoolin (None, 16, 16, 16) 0
g2D)
conv2d_3 (Conv2D) (None, 16, 16, 16) 2320
up_sampling2d (UpSampling2 (None, 32, 32, 16) 0
D)
conv2d_4 (Conv2D) (None, 32, 32, 32) 4640
up_sampling2d_1 (UpSamplin (None, 64, 64, 32) 0
g2D)
conv2d_5 (Conv2D) (None, 64, 64, 64) 18496
up_sampling2d_2 (UpSamplin (None, 128, 128, 64) 0
g2D)
conv2d_6 (Conv2D) (None, 128, 128, 3) 1731
=================================================================
Total params: 52067 (203.39 KB)
Trainable params: 52067 (203.39 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[7], line 28
25 model.summary()
27 #Fit the model.
---> 28 history = model.fit(
29 train_generator,
30 steps_per_epoch= 500 // batch_size,
31 epochs=1000,
32 validation_data=validation_generator,
33 validation_steps=75 // batch_size,
34 shuffle = True)
37 #plot the training and validation accuracy and loss at each epoch
38 loss = history.history['loss']
File d:\Studium\Masterarbeit\Einarbeitung\Codebeispiele\detecting_anomalies\.venv\Lib\site-packages\keras\src\utils\traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File d:\Studium\Masterarbeit\Einarbeitung\Codebeispiele\detecting_anomalies\.venv\Lib\site-packages\keras\src\preprocessing\image.py:103, in Iterator.__getitem__(self, idx)
101 def __getitem__(self, idx):
102 if idx >= len(self):
--> 103 raise ValueError(
104 "Asked to retrieve element {idx}, "
105 "but the Sequence "
106 "has length {length}".format(idx=idx, length=len(self))
107 )
108 if self.seed is not None:
109 np.random.seed(self.seed + self.total_batches_seen)
ValueError: Asked to retrieve element 0, but the Sequence has length 0
%% Cell type:code id: tags:
``` python
# Get all batches generated by the datagen and pick a batch for prediction
#Just to test the model.
data_batch = [] #Capture all training batches as a numpy array
img_num = 0
while img_num <= train_generator.batch_index: #gets each generated batch of size batch_size
data = train_generator.next()
data_batch.append(data[0])
img_num = img_num + 1
predicted = model.predict(data_batch[0]) #Predict on the first batch of images
#Sanity check, view few images and corresponding reconstructions
image_number = random.randint(0, predicted.shape[0])
plt.figure(figsize=(12, 6))
plt.subplot(121)
plt.imshow(data_batch[0][image_number])
plt.subplot(122)
plt.imshow(predicted[image_number])
plt.show()
#Let us examine the reconstruction error between our validation data (good/normal images)
# and the anomaly images
validation_error = model.evaluate_generator(validation_generator)
anomaly_error = model.evaluate_generator(anomaly_generator)
print("Recon. error for the validation (normal) data is: ", validation_error)
print("Recon. error for the anomaly data is: ", anomaly_error)
```
%% Cell type:code id: tags:
``` python
#Let us extract (or build) the encoder network, with trained weights.
#This is used to get the compressed output (latent space) of the input image.
#The compressed output is then used to calculate the KDE
encoder_model = Sequential()
encoder_model.add(Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(SIZE, SIZE, 3), weights=model.layers[0].get_weights()) )
encoder_model.add(MaxPooling2D((2, 2), padding='same'))
encoder_model.add(Conv2D(32, (3, 3), activation='relu', padding='same', weights=model.layers[2].get_weights()))
encoder_model.add(MaxPooling2D((2, 2), padding='same'))
encoder_model.add(Conv2D(16, (3, 3), activation='relu', padding='same', weights=model.layers[4].get_weights()))
encoder_model.add(MaxPooling2D((2, 2), padding='same'))
encoder_model.summary()
########################################################
# Calculate KDE using sklearn
from sklearn.neighbors import KernelDensity
#Get encoded output of input images = Latent space
encoded_images = encoder_model.predict_generator(train_generator)
# Flatten the encoder output because KDE from sklearn takes 1D vectors as input
encoder_output_shape = encoder_model.output_shape #Here, we have 16x16x16
out_vector_shape = encoder_output_shape[1]*encoder_output_shape[2]*encoder_output_shape[3]
encoded_images_vector = [np.reshape(img, (out_vector_shape)) for img in encoded_images]
#Fit KDE to the image latent data
kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(encoded_images_vector)
#Calculate density and reconstruction error to find their means values for
#good and anomaly images.
#We use these mean and sigma to set thresholds.
def calc_density_and_recon_error(batch_images):
density_list=[]
recon_error_list=[]
for im in range(0, batch_images.shape[0]-1):
img = batch_images[im]
img = img[np.newaxis, :,:,:]
encoded_img = encoder_model.predict([[img]]) # Create a compressed version of the image using the encoder
encoded_img = [np.reshape(img, (out_vector_shape)) for img in encoded_img] # Flatten the compressed image
density = kde.score_samples(encoded_img)[0] # get a density score for the new image
reconstruction = model.predict([[img]])
reconstruction_error = model.evaluate([reconstruction],[[img]], batch_size = 1)[0]
density_list.append(density)
recon_error_list.append(reconstruction_error)
average_density = np.mean(np.array(density_list))
stdev_density = np.std(np.array(density_list))
average_recon_error = np.mean(np.array(recon_error_list))
stdev_recon_error = np.std(np.array(recon_error_list))
return average_density, stdev_density, average_recon_error, stdev_recon_error
#Get average and std dev. of density and recon. error for uninfected and anomaly (parasited) images.
#For this let us generate a batch of images for each.
train_batch = train_generator.next()[0]
anomaly_batch = anomaly_generator.next()[0]
uninfected_values = calc_density_and_recon_error(train_batch)
anomaly_values = calc_density_and_recon_error(anomaly_batch)
```
%% Cell type:code id: tags:
``` python
#Now, input unknown images and sort as Good or Anomaly
def check_anomaly(img_path):
density_threshold = 2500 #Set this value based on the above exercise
reconstruction_error_threshold = 0.004 # Set this value based on the above exercise
img = Image.open(img_path)
img = np.array(img.resize((128,128), Image.ANTIALIAS))
plt.imshow(img)
img = img / 255.
img = img[np.newaxis, :,:,:]
encoded_img = encoder_model.predict([[img]])
encoded_img = [np.reshape(img, (out_vector_shape)) for img in encoded_img]
density = kde.score_samples(encoded_img)[0]
reconstruction = model.predict([[img]])
reconstruction_error = model.evaluate([reconstruction],[[img]], batch_size = 1)[0]
if density < density_threshold or reconstruction_error > reconstruction_error_threshold:
print("The image is an anomaly")
else:
print("The image is NOT an anomaly")
#Load a couple of test images and verify whether they are reported as anomalies.
import glob
para_file_paths = glob.glob('cell_images2/parasitized/images/*')
uninfected_file_paths = glob.glob('cell_images2/uninfected_train/images/*')
#Anomaly image verification
num=random.randint(0,len(para_file_paths)-1)
check_anomaly(para_file_paths[num])
#Good/normal image verification
num=random.randint(0,len(para_file_paths)-1)
check_anomaly(uninfected_file_paths[num])
```
absl-py==1.4.0
asttokens==2.2.1
astunparse==1.6.3
backcall==0.2.0
cachetools==5.3.1
certifi==2023.7.22
charset-normalizer==3.2.0
colorama==0.4.6
comm==0.1.3
contourpy==1.1.0
cycler==0.11.0
debugpy==1.6.7
decorator==5.1.1
executing==1.2.0
flatbuffers==23.5.26
fonttools==4.41.1
gast==0.4.0
google-auth==2.22.0
google-auth-oauthlib==1.0.0
google-pasta==0.2.0
grpcio==1.56.2
h5py==3.9.0
idna==3.4
imageio==2.31.1
ipykernel==6.25.0
ipython==8.14.0
jedi==0.18.2
jupyter_client==8.3.0
jupyter_core==5.3.1
keras==2.13.1
kiwisolver==1.4.4
lazy_loader==0.3
libclang==16.0.6
Markdown==3.4.4
MarkupSafe==2.1.3
matplotlib==3.7.2
matplotlib-inline==0.1.6
nest-asyncio==1.5.6
networkx==3.1
numpy==1.24.3
oauthlib==3.2.2
opencv-python==4.8.0.74
opt-einsum==3.3.0
packaging==23.1
pandas==2.0.3
parso==0.8.3
pickleshare==0.7.5
Pillow==10.0.0
platformdirs==3.9.1
prompt-toolkit==3.0.39
protobuf==4.23.4
psutil==5.9.5
pure-eval==0.2.2
pyasn1==0.5.0
pyasn1-modules==0.3.0
Pygments==2.15.1
pyparsing==3.0.9
python-dateutil==2.8.2
pytz==2023.3
PyWavelets==1.4.1
pywin32==306
pyzmq==25.1.0
requests==2.31.0
requests-oauthlib==1.3.1
rsa==4.9
scikit-image==0.21.0
scipy==1.11.1
six==1.16.0
stack-data==0.6.2
tensorboard==2.13.0
tensorboard-data-server==0.7.1
tensorflow==2.13.0
tensorflow-estimator==2.13.0
tensorflow-intel==2.13.0
tensorflow-io-gcs-filesystem==0.31.0
termcolor==2.3.0
tifffile==2023.7.18
tornado==6.3.2
traitlets==5.9.0
typing_extensions==4.5.0
tzdata==2023.3
urllib3==1.26.16
wcwidth==0.2.6
Werkzeug==2.3.6
wrapt==1.15.0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment