이미지를 이용하여 객체 인식 후 소리내기

개발일지/ML(Machine Learning),DL(Deep Learning)

이미지를 이용하여 객체 인식 후 소리내기

shinyfood 2024. 1. 11. 23:39

728x90

이번 게시물에서는 이미지를 이용하여 객체를 인식하고, 인식된 객체가 맞는지 아닌지 인식이 됐다면, 맞는사람에 한해 소리를 내는 모델을 만들어보도록 하겠습니다.

여전히 임포트부터 시작합니다.

import os
import numpy as np
import pandas as pd
#OpenCV 라이브러리
import cv2
#이미지 증식 라이브러리 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
#이미지를 numpy array(배열)로 변환하는 라이브러리
from tensorflow.keras.preprocessing.image import img_to_array

# 특정 원본 이미지 폴더 내의 모든 이미지 파일을 읽어들여서
#  - 이미지 픽셀 데이터로 변환해서 리턴하는 함수 정의 
def load_images(directory) :
    # 각 이미지 별 변환 데이터를 저장할 리스트 변수
    images = []
    
    # 특정 디렉토리의 모든 파일 읽어들이기
    for filename in os.listdir(directory) :
        
        #특정 확장자를 가지는 파일만 읽어들이기
        if filename.endswith((".jpg", ".jpeg", ".png")) :
            #파일명 추출
            img_path = os.path.join(directory, filename)

            # 이미지를 3차원 데이터로 변환하기
            img = cv2.imread(img_path)

            #이미지 데이터의 픽셀 크기를 너비 200, 높이 220
            img = cv2.resize(img, (200,220))

            #변환된 이미지 데이터를 리스트 변수에 담기
            images.append(img)
            
    # numpy 배열로 변환하여 리턴
    return np.array(images)

이미지를 받아서 이미지를 3차원 데이터로 변경한뒤, numpy배열로 리턴하는 함수입니다.

###특정 폴더 내의 모든 이미지파일 로드하여, 이미지 데이터로 변환하는 함수 호출
# 폴더 위치 지정
input_directory = "./data/01_org_img/"

# 함수호출
images = load_images(input_directory)
images.shape

(10, 220, 200, 3)

이렇게 변환되어 나옵니다.

읽어들인 모든 원본 이미지 각각에 대해서 이미지 증식 및 종속변수(라벨) 정의 함수

"""
 - images : 원본 전체에 대한 각 이미지 데이터
 - output_directory : 증식한 이미지를 저장할 폴더 위치
 - target_filename : 종속변수 데이터를 저장할 (폴더+파일명)
"""

def create_images(images, output_directory, target_filename) :
    # 종속 변수 데이터를 담을 리스트 변수
    target = []

    # 원본 이미지 10개에 대한 라벨링 기준
    # - 1은 소리가 날 인자, 0은 소리가 안날 인자
    target_list = [1, 0, 1, 0, 1, 1, 0, 1, 1, 0]

    ############### 이미지 증식 객체 생성하기 ###################
    datagen = ImageDataGenerator(
                    rescale = 1./255,
                    rotation_range=15,
                    width_shift_range=0.1,
                    height_shift_range=0.1,
                    shear_range=0.5,
                    zoom_range=[0.8, 2.0],
                    horizontal_flip=True,
                    vertical_flip=True,
                    fill_mode="nearest"
    )
    ############### 이미지 증식 시키기 ##################
    
    # - i   : images의 인덱스 번호
    # - img : 각 인덱스 별 이미지 데이터
    for i, img in enumerate(images) :
        print(f"============[{i+1}/{len(images)}]번째 증식 중===========")

        ### 3차원 이미지 데이터를 4차원으로 변환하기
        img = img.reshape((1,) + img.shape)

        ### 증식된 이미지를 저장할 파일명 정의
        save_prefix = "train_img_" + str(i)

        ### 각 이미지 별 생성(증식)할 이미지의 개수 정의
        create_images_cnt = 20

        ######### 이미지 증식 및 파일 저장 시키기 #########
        for batch in datagen.flow(
                                    img,
                                    save_to_dir = output_directory,
                                    save_prefix = save_prefix,
                                    save_format = "png"
                                ) :
            ### 증식된 이미지 별로 [종속변수]로 사용할 값을 리스트에 담기
            target.append(target_list[i])

            ### 생성할 이미지 개수만큼 만들어지면 반복 종료시키기
            create_images_cnt -= 1
            if create_images_cnt == 0:
                break

    ### 종속변수 리스트를 numpy 배열 타입으로 변환하기
    target = np.array(target)

    ### 종속변수 리스트를 numpy 파일로 저장시키기
    # - target_filename : 저장할 폴더 + 파일명
    # - target : 종속변수 리스트
    
    np.save(target_filename, target)

이렇게 만들어놓고 함수를 호출하면,

"""이미지 증식 및 파일 저장, 종속변수 생성 및 파일 저장 함수 호출"""
### 증식된 이미지 파일 저장 폴더
output_directory = "./data/02_train_img/"

### 종속변수 데이터 파일명
# - numpy 파일의 확장자는 보통 .npy를 사용합니다.
target_filename = "./data/03_train_4d_data/target_data.npy"

### 함수 호출하기
          # = 을사용하면 파라미터 순서가 달라도 사용가능.(target_filename=target_filename)
create_images(images, output_directory, target_filename)

============[1/10]번째 증식 중===========
============[2/10]번째 증식 중===========
============[3/10]번째 증식 중===========
============[4/10]번째 증식 중===========
============[5/10]번째 증식 중===========
============[6/10]번째 증식 중===========
============[7/10]번째 증식 중===========
============[8/10]번째 증식 중===========
============[9/10]번째 증식 중===========
============[10/10]번째 증식 중===========

이런식으로 증식되는 숫자만큼 나온다.

def save_4d_data(output_directory, save_4d_filename) :
    ### 4차원 데이터를 담을 리스트 변수
    data = []
    
    ### 증식된 모든 파일 읽어들이기
    for filename in os.listdir(output_directory) :
        if filename.endswith((".jpg", ".jpeg", ".png")) :
            img_path = os.path.join(output_directory, filename)

            ### 3차원 데이터로 읽어들이기
            img = cv2.imread(img_path)

            # print(img)
            
            ### 픽셀 정규화
            img = cv2.resize(img, (200,220))

            ### 리스트에 담기
            data.append(img)
            
    data = np.array(data)
    print(f"최종처리결과 : {data.shape}")
    
    np.save(save_4d_filename, data)

이렇게 데이터를 세이브하고

"""증식된 이미지 각각에 대한 이미지 데이터로 변환하여 numpy 배열 파일로 저장하기"""
save_4d_filename = "./data/03_train_4d_data/train_4d_data.npy"

save_4d_data(output_directory, save_4d_filename)
최종처리결과 : (200, 220, 200, 3)

함수를 호출하면 이렇게 나온다.

4차원 이미지 데이터와 종속 변수 데이터를 읽어들여서 CNN 훈련시키기

다시 임포트

### 사용할 라이브러리 다시 정의
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

# 독립변수 종속변수 불러오기
image_data = np.load("./data/03_train_4d_data/train_4d_data.npy")
label_data = np.load("./data/03_train_4d_data/target_data.npy")

# 정규화
X = image_data/ 255.0

X_train, X_test, y_train, y_test = train_test_split(X, label_data, 
	test_size=0.2, random_state=42)

print(f"{X_train.shape}/{y_train.shape}")
print(f"{X_test.shape}/{y_test.shape}")

(160, 220, 200, 3)/(160,)
(40, 220, 200, 3)/(40,)

모델 훈련을 위해 전처리를해주고, 모델을 생성해보도록 하자.

# 모델 생성
model = tf.keras.Sequential()

# 입력계층
model.add(tf.keras.layers.Conv2D(kernel_size=3, filters=32, activation="relu", padding="same", strides=1, input_shape=(220,200, 3)))
# 은닉계층
model.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
model.add(tf.keras.layers.Conv2D(kernel_size=3, filters=32, activation="relu", padding="same", strides=2))
model.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(50,activation="relu"))
model.add(tf.keras.layers.Dropout(0.2))
# 출력계층
model.add(tf.keras.layers.Dense(1, activation="sigmoid"))
model.summary()

계층 생성 후, 확인

이후 모델을 훈련한다.

history = model.fit(X_train, y_train, epochs=15, verbose=2, batch_size=32,
          validation_data=(X_test, y_test))

import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(5,3))
plt.title("Loss")
plt.plot(history.epoch, history.history["loss"])
plt.plot(history.epoch, history.history["val_loss"])
plt.grid()
plt.show()

plt.figure(figsize=(5,3))
plt.title("accuracy")
plt.plot(history.epoch, history.history["accuracy"])
plt.plot(history.epoch, history.history["val_accuracy"])
plt.grid()
plt.show()

CNN 모델을 이용해서 위험한 사람 확인 및 비프음 발생시키기

"""사용 라이브러리"""
import os
import platform
### 윈도우 OS인 경우에만 사용(mac은 제외)
import winsound
import time

pred_img = cv2.imread("./data/04_detection_img/0_1.png")

# pred_img = cv2.resize(predimg,(200,220))
pred_img.shape
pred_img = pred_img.reshape((-1,) + pred_img.shape)
# pred_img
pred_data = pred_img / 255.0
pred_data

array([[[[0.62352941, 0.6745098 , 0.72156863],
         [0.62352941, 0.6745098 , 0.72156863],
         [0.62352941, 0.6745098 , 0.72156863],
         ...,
		 [0.79215686, 0.76078431, 0.76078431],
         [0.79215686, 0.76078431, 0.76078431],
         [0.79215686, 0.76078431, 0.76078431]]]])

y_pred = model.predict(pred_data)

""" 확률이 50%이상인 경우 위험인자로 인식해서 비프음 발생하기"""
if y_pred[0] > 0.5 :
    print(f"인식률[{y_pred[0]}] : 원하는 분이셔요")
    ### 비프음 발생시키기
    # - 윈도우 OS인 경우
    if platform.system() == "Windows" :
        ### 1000 : 소리 주파수(ㅅ소리 높낮이 조정값)
        # - 보통 사람의 청각 범위 : 20Hz~20,000HZ 정도
        ### 500 : wlthrtlrks (1000은 1초, 500은 0.5초)
        for i in range(5) :
            #Beep 대신 여러가지를 넣으면 여러가지 소리가 난다.
            winsound.Beep(1000,500)   
            time.sleep(0.5)
    ### MAC인 OS인 경우
    elif platform.system() == "Darwin" : 
        os.system("osascript -e 'beep'")
else :
    print(f"인식률[{y_pred[0]}] : 소리가안나용")

이런식으로하면소리가 난다.

728x90