当前位置 博文首页 > hallobike的博客:深度学习中常见的图像处理任务

    hallobike的博客:深度学习中常见的图像处理任务

    作者:[db:作者] 时间:2021-09-14 21:59

    常见图像处理的任务

    1、分类

    给定一副图像,我们用计算机模型预测图片中有什么对象。

    2、分类与定位

    我们不仅要知道图片中的对象是什么,还要在对象的附近画一个边框,确定该对象所处的位置。

    3、语义分割

    区分到图中每一点像素点,而不仅仅是矩形框框住。

    4、目标检测

    目标检测简单来说就是回答图片里面有什么?分别在哪里?(并把它们使用矩形框框住)

    5、实例分割

    实例分割是目标检测和语义分割的结合。相对目标检测的边界框,实例分割可精确到物体的边缘;相对语义分割, 实例分割需要标出图上同一物体的不同个体。

    图像定位

    对于单纯的分类问题,比较容易理解,给定一副图片,我们输出一个标签类别,已经很熟悉。

    定位比较复杂,需要输出四个数字(x,y,w,h),图像中某一个点的坐标(x,y),以及图像的宽度和高度,有了这四个数字,我们可以很容易地找到物体的边框。

    Oxford-IIIT数据集

    The Oxford-IIIT Pet Dataset是一个宠物图像数据集,包含37种宠物,每种宠物200张左右宠物图片,该数据集同时包含宠物分类、头部轮廓标注和语义分割信息。

    先来看看图片定位

    # -*- coding: UTF-8 -*-"""
    Author: LGD
    FileName: image_position
    DateTime: 2020/12/25 09:45 
    SoftWare: PyCharm
    """import tensorflow as tf
    import matplotlib.pyplot as plt
    from lxml import etree
    import numpy as np
    import glob
    from matplotlib.patches import Rectangle
    
    print(tf.__version__)
    print(tf.test.is_gpu_available())
    
    img = tf.io.read_file('images/Abyssinian_1.jpg')
    
    img = tf.image.decode_jpeg(img)
    print(img.shape)
    
    plt.imshow(img)
    plt.show()
    
    xml = open('annotations/xmls/Abyssinian_1.xml').read()
    sel = etree.HTML(xml)
    width = int(sel.xpath('//size/width/text()')[0])
    print(width)
    height = int(sel.xpath('//size/height/text()')[0])
    
    xmin = int(sel.xpath('//bndbox/xmin/text()')[0])
    xmax = int(sel.xpath('//bndbox/xmax/text()')[0])
    ymin = int(sel.xpath('//bndbox/ymin/text()')[0])
    ymax = int(sel.xpath('//bndbox/ymax/text()')[0])
    
    print(height, xmin, xmax, ymin, ymax)
    
    plt.imshow(img)
    rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')
    ax = plt.gca()
    ax.axes.add_patch(rect)
    plt.show()
    

    图片缩放与目标值的规范

    img = tf.image.resize(img, [224, 224])
    img = img / 255
    plt.imshow(img)
    plt.show()
    
    xmin = (xmin/width)*224
    xmax = (xmax/width)*224
    ymin = (ymin/height)*224
    ymax = (ymax/height)*224
    
    
    plt.imshow(img)
    rect = Rectangle((xmin, ymin), (xmax - xmin), (ymax - ymin), fill=False, color='red')
    ax = plt.gca()
    ax.axes.add_patch(rect)
    plt.show()
    

    模型训练

    # -*- coding: UTF-8 -*-"""
    Author: LGD
    FileName: image_position
    DateTime: 2020/12/25 09:45 
    SoftWare: PyCharm
    """import tensorflow as tf
    import matplotlib.pyplot as plt
    from lxml import etree
    import numpy as np
    import glob
    from matplotlib.patches import Rectangle
    
    print(tf.__version__)
    print(tf.test.is_gpu_available())
    
    # img = tf.io.read_file('images/Abyssinian_1.jpg')## img = tf.image.decode_jpeg(img)# print(img.shape)## plt.imshow(img)# plt.show()## xml = open('annotations/xmls/Abyssinian_1.xml').read()# sel = etree.HTML(xml)# width = int(sel.xpath('//size/width/text()')[0])# height = int(sel.xpath('//size/height/text()')[0])## xmin = int(sel.xpath('//bndbox/xmin/text()')[0])# xmax = int(sel.xpath('//bndbox/xmax/text()')[0])# ymin = int(sel.xpath('//bndbox/ymin/text()')[0])# ymax = int(sel.xpath('//bndbox/ymax/text()')[0])## print(height, xmin, xmax, ymin, ymax)## plt.imshow(img)# rect = Rectangle((xmin, ymin), (xmax - xmin), (ymax - ymin), fill=False, color='red')# ax = plt.gca()# ax.axes.add_patch(rect)# plt.show()## img = tf.image.resize(img, [224, 224])# img = img / 255# plt.imshow(img)# plt.show()## xmin = (xmin/width)*224# xmax = (xmax/width)*224# ymin = (ymin/height)*224# ymax = (ymax/height)*224### plt.imshow(img)# rect = Rectangle((xmin, ymin), (xmax - xmin), (ymax - ymin), fill=False, color='red')# ax = plt.gca()# ax.axes.add_patch(rect)# plt.show()
    
    # 创建输入管道
    images = glob.glob('images/*.jpg')
    print(images[:5])
    print(images[-5:])
    print(len(images))
    
    xmls = glob.glob('annotations/xmls/*.xml')
    print(xmls[:5])
    print(xmls[-5:])
    print(len(xmls))
    
    names = [x.split('\\')[-1].split('.xml')[0] for x in xmls]
    print(names, len(names))
    
    imgs_train = [img for img in images if img.split('\\')[-1].split('.jpg')[0] in names]
    print(imgs_train, len(imgs_train))
    
    imgs_test = [img for img in images if img.split('\\')[-1].split('.jpg')[0] not in names]
    
    imgs_train.sort(key=lambda x: x.split('\\')[-1].split('.jpg')[0])
    xmls.sort(key=lambda x: x.split('\\')[-1].split('.jpg')[0])
    
    print(imgs_train[-5:])
    print(xmls[-5:])
    
    
    def to_labels(path):
        xml = open('{}'.format(path)).read()
        sel = etree.HTML(xml)
        width = int(sel.xpath('//size/width/text()')[0])
        height = int(sel.xpath('//size/height/text()')[0])
        xmin = int(sel.xpath('//bndbox/xmin/text()')[0])
        xmax = int(sel.xpath('//bndbox/xmax/text()')[0])
        ymin = int(sel.xpath('//bndbox/ymin/text()')[0])
        ymax = int(sel.xpath('//bndbox/ymax/text()')[0])
    
        return [xmin / width, ymin / height, xmax / width, ymax / height]
    
    
    labels = [to_labels(path) for path in xmls]
    
    print(labels[:3])
    out1, out2, out3, out4 = list(zip(*labels))
    print(len(out1), len(out2), len(out3), len(out4))
    
    out1 = np.array(out1)
    out2 = np.array(out2)
    out3 = np.array(out3)
    out4 = np.array(out4)
    
    label_datasets = tf.data.Dataset.from_tensor_slices((out1, out2, out3, out4))
    print('label_datasets: ', label_datasets)
    
    
    def load_image(path):
        img = tf.io.read_file(path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, (224, 224))
        img = img / 127.5 - 1
        return img
    
    
    image_dataset = tf.data.Dataset.from_tensor_slices(imgs_train)
    image_dataset = image_dataset.map(load_image)
    print(image_dataset)
    
    dataset = tf.data.Dataset.zip((image_dataset, label_datasets))
    dataset = dataset.repeat().shuffle(len(imgs_train)).batch(32)
    for img, label in dataset.take(1):
        plt.imshow(tf.keras.preprocessing.image.array_to_img(img[0]))
        out1, out2, out3, out4 = label
        xmin, ymin, xmax, ymax = out1[0].numpy() * 224, out2[0].numpy() * 224, out3[0].numpy() * 224, out4[0].numpy() * 224
        rect = Rectangle((xmin, ymin), (xmax - xmin), (ymax - ymin), fill=False, color='red')
        ax = plt.gca()
        ax.axes.add_patch(rect)
        plt.show()
    
    # 模型创建阶段
    xception = tf.keras.applications.Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    
    inputs = tf.keras.layers.Input(shape=(224, 224, 3))
    
    x = xception(inputs)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(2048, activation='relu')(x)
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    out1 = tf.keras.layers.Dense(1)(x)
    out2 = tf.keras.layers.Dense(1)(x)
    out3 = tf.keras.layers.Dense(1)(x)
    out4 = tf.keras.layers.Dense(1)(x)
    
    prediction = [out1, out2, out3, out4]
    
    model = tf.keras.models.Model(inputs=inputs, outputs=prediction)
    
    model.compile(
        tf.keras.optimizers.Adam(learning_rate=0.0001),
        loss='mse',
        metrics=['mae']
    )
    
    EPOCHS = 50
    
    history = model.fit(
        dataset,
        epochs=EPOCHS,
        steps_per_epoch=len(imgs_train) // 32
    )
    cs