Object Detection

object detection project - keras를 사용한 detection(1)

jwjwvison 2021. 8. 24. 22:54

 앞선 포스팅에서는 yolo를 사용해 물체를 detect 했다. 이번에는 직접 케라스를 이용해서 CNN을 통해 image들을 학습시킨 후, image 안에 존재하는 객체를 ROI로 영역을 나누어서 객체 탐지를 해보겠다.

 

 먼저 파일들의 경로와 필요한 모듈을을 import 해준다.

import tensorflow as tf
from tensorflow import keras

import numpy as np
import matplotlib.pyplot as plt
import math
import os
import cv2

batch_size=128
num_classes=0
epochs=100
CW=32
CH=32
CD=3
model_name='fruit_custom.h5'
# tflite_model_name='fruit_custom.tfile'
COLAB_DARKNET_PATH='/content/drive/MyDrive/darknet'
YOLO_IMAGE_PATH=COLAB_DARKNET_PATH + '/custom/'
YOLO_FORMAT_PATH=COLAB_DARKNET_PATH + '/custom/'
classes=[]

train_images=[]
train_labels=[]
test_images=[]
test_labels=[]

sample_test_image='sample_test.jpg'
sample_test_label=0

 

 이미지를 보여주는 함수, 다운로드 해주는 함수, ROI를 설정하는 함수를 정의한다.

# Helper functio to display digit images
def show_sample(images,labels,sample_count=25):
  # create a square with can fit {sample_count} images
  grid_count=math.ceil(math.ceil(math.sqrt(sample_count)))
  grid_count=min(grid_count,len(images),len(labels))

  plt.figure(figsize=(2*grid_count , 2*grid_count))
  for i in range(sample_count):
    plt.subplot(grid_count,grid_count,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    image=cv2.cvtColor(images[i],cv2.COLOR_BGR2RGB)
    plt.imshow(image,cmap=plt.cm.gray)
    plt.xlabel(labels[i])
  plt.show()

# download a file if you're using Colab
def download(path):
  try:
    from google.colab import files
    files.download(path)
  except ImportError:
    import os
    print('Error download', os.path.join(os.getcwd(),path))

# Get photo ROI cordinates from YOLO format
def getROI(size,box):
  width_ratio=size[1]
  height_ratio=size[0]

  # 상대좌표를 절대좌표로 변환
  x=float(box[1]) * width_ratio    #centerX
  y=float(box[2]) * height_ratio   #centerY
  w=float(box[3]) * width_ratio
  h=float(box[4]) * height_ratio

  half_width=w/2.0
  half_height=h/2.0
  startX=int(x-half_width)
  startY=int(y-half_height)
  endX=int(x+half_width)
  endY=int(y+half_height)
  return (startY,endY,startX,endX)

def imShow(path):
  fig=plt.gcf()
  # fig.set_size_inches(18,10)
  plt.axis('off')
  plt.imshow(cv2.cvtColor(path,cv2.COLOR_BGR2RGB))
  plt.show()

 

 classes.txt 파일을 열어 어떤 클래스가 있는지 확인해본다.

with open(YOLO_FORMAT_PATH + 'classes.txt', 'r') as txt:
  for line in txt:
    name=line.replace('\n','')
    classes.append(name)
    num_classes+=1
  print(classes,num_classes)

 

train data와 test data를 이미지와 레이블 리스트로 나누고, ROI 좌표를 구해 사진안에 있는 객체 부분만 잘라낸다. 그다음 이미지의 사이즈를 (32,32)로 축소시켜준다.

image_count=0
with open(YOLO_FORMAT_PATH + 'train.txt', 'r') as txt:
  for line in txt:
    image_path=line.replace('\n','')
    img=cv2.imread(image_path)
    size=img.shape[:2]
    text_path=image_path[:-4] + '.txt'  
    with open(text_path,'r') as txt:
      for line in txt:
        box=line.split()   # [class,center_x,center_y,w,h ]
        (startY,endY,startX,endX) = getROI(size,box)
        image=cv2.resize(img[startY:endY,startX:endX],(CW,CH),interpolation=cv2.INTER_AREA)  #(32,32) 만큼 resize 해준다. INTER_AREA -> 축소할때 좋은 옵션
        
        # 귤,사과는 색 정보도 중요해서 그레이스케일로 변환하지 않는다.
        #gray=cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY)
        #equ=cv2.equalizeHist(gray)
        #imshow(image)
        #print(int(box[0]))
        # update the list of data and labels, respectively
        
        train_images.append(image)
        train_labels.append(int(box[0]))
        image_count+=1

# convert the data and labels to Numpy arrays
train_images=np.array(train_images)
train_labels=np.array(train_labels)
# scale data to the range of [0,1]
train_images=train_images.astype('float32') / 255.0
# one-hot encode the trainig and testing labels
train_labels=keras.utils.to_categorical(train_labels,num_classes)
print('%d images added' % image_count)

image_count=0
with open(YOLO_FORMAT_PATH + 'test.txt', 'r') as txt:
  for line in txt:
    image_path=line.replace('\n','')
    img=cv2.imread(image_path)
    size=img.shape[:2]
    text_path=image_path[:-4] + '.txt'  
    with open(text_path,'r') as txt:
      for line in txt:
        box=line.split()   # [class,center_x,center_y,w,h ]
        (startY,endY,startX,endX) = getROI(size,box)
        image=cv2.resize(img[startY:endY,startX:endX],(CW,CH),interpolation=cv2.INTER_AREA)  #(32,32) 만큼 resize 해준다. INTER_AREA -> 축소할때 좋은 옵션
        test_images.append(image)
        test_labels.append(int(box[0]))
        image_count+=1

# convert the data and labels to Numpy arrays
test_images=np.array(test_images)
test_labels=np.array(test_labels)
# scale data to the range of [0,1]
test_images=test_images.astype('float32') / 255.0
# one-hot encode the trainig and testing labels
test_labels=keras.utils.to_categorical(test_labels,num_classes)
print('%d images added' % image_count)

 

 training data에 있는 데이터들을 확인해보자.

# show images in the training  dataset
show_number=len(train_images)
print('Total number of images: %d' %show_number)
if (show_number > 25):
  show_number=25
show_sample(train_images,['%s' %classes[np.argmax(label)] for label in train_labels],show_number)

 

 이제 CNN을 만들어준다.

# Define the model architecture
inputShape=(CH,CW,CD)

model=keras.Sequential([
                        
                        keras.layers.Conv2D(filters=32,kernel_size=(3,3),activation=tf.nn.relu,input_shape=inputShape),
                        keras.layers.Conv2D(filters=64,kernel_size=(3,3),activation=tf.nn.relu),
                        keras.layers.MaxPooling2D((2,2)),
                        keras.layers.Dropout(0.25),
                        keras.layers.Flatten(),
                        keras.layers.Dense(128,activation=tf.nn.relu),
                        keras.layers.Dropout(0.5),

                        keras.layers.Dense(num_classes,activation=tf.nn.softmax)

])

model.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])
              
modelCheckpoint=tf.keras.callbacks.ModelCheckpoint(model_name,save_best_only=True)

earlyStopping=tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=20)

history=model.fit(train_images,train_labels,validation_data=(test_images,test_labels),
                  epochs=epochs,batch_size=batch_size,
                  callbacks=[earlyStopping,modelCheckpoint])

 

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train','val'])
plt.show()