【機器學習實踐】Kaggle 之 Face Verification Challenge練手
來自: http://blog.csdn.net//chenriwei2/article/details/50321627
導言
這個一個Inclass 的比賽,主要任務就是給定1000多張圖片所組成的901153對所有的組合圖像,要求進行判斷是否是同一個人。
作為第一次做Kaggle的比賽,來練練手還是不錯的。
由于在這所有的二元組中,數據是極其步平衡的,所以會導致即使我們把這些所有的組合都判斷為1(不同的人),它的精度也會達到99%以上,所以在這里單單評測識別率是沒有意義的。
官方這邊給出的評價方法是AUC,也就是說在ROC曲線之下的面積來作為衡量的標準。
步驟
數據下載
官方給定的數據訓練數據和測試數據,由于我這邊是采用無監督的方法,所以訓練數據也不重要,可以步采用。
處理
訓練數據和測試數據都有給人臉框位置和關鍵點位置,然而,它具體的方法沒有給定,所以還是無法采用,最終的方法是,我采用自己的那一套人臉檢測和對齊方法,對于自己的方法沒有檢測和對齊到的圖像,采用官方提供的版本代替。
代碼如下:
# -- coding: utf-8 -- ''' @brief: 進行一對一的人臉比對,前提是人臉已經統一對齊過了。 @author: Riwei Chen <riwei.chen@outlook.com> ''' import matplotlib.pyplot as plt import numpy as np import skimage import sys import os import glob import numpy.linalg as LA caffe_root = '/home/crw/caffe-master/' caffe_root = '/media/crw/MyBook/Caffe/caffe-triplet/' sys.path.insert(0, caffe_root + 'python') import caffe import sklearn import sklearn.metrics.pairwise as pw from sklearn.metrics import classification_report from sklearn.metrics import accuracy_score,roc_auc_score from skimage import transform as tfcaffe.set_mode_cpu()
訓練數據中,每個通道的平均值
averageImg = [129.1863,104.7624,93.5940]
全局使用到的一些數據,保留在全局變量
=====================================
metric='cosine' model_define='model_maxout/deploy.prototxt' model_weight='model_maxout/small_maxout2__iter_1360000.caffemodel'
model_weight='/media/crw/MyBook/Model/FaceRecognition/Softmax/try6_7/small_maxout100x100__iter_1400000.caffemodel'
feature_layer='eltwise10'
feature_layer='l2_norm'
image_formats =['jpg','png','bmp'] feature_len = 256 data_w = 128 data_h = 128
feature_len = 128
data_w = 256
data_h = 256
data_as_gray = True sub_mean = False scale = 1
scale =255
net = caffe.Classifier(model_define, model_weight)
====================================
def read_image(filename,w=128,h=128,as_grey=False): ''' @brief: 讀取一個圖片,返回矩陣 @param:w,h:保留的圖像大小 ''' if as_grey == True: X=np.empty((1,1,w,h)) else: X=np.empty((1,3,w,h)) image=skimage.io.imread(filename,as_grey=as_grey) image=tf.resize(image,(w,h))*scale if as_grey == True: X[0,0,:,:]=image[:,:] else:
# 注意通道的一致性 if sub_mean == True: X[0,2,:,:]=image[:,:,0]-averageImg[0] X[0,1,:,:]=image[:,:,1]-averageImg[1] X[0,0,:,:]=image[:,:,2]-averageImg[2] else: X[0,2,:,:]=image[:,:,0] X[0,1,:,:]=image[:,:,1] X[0,0,:,:]=image[:,:,2] return X
def get_image_feature(filename): ''' @brief:獲取特征 @param: 圖像的文件 @return:feature,提取到的人臉特征 ''' X=read_image(filename,w=data_w,h=data_h,as_grey=data_as_gray)
out = net.forward_all(data=X)
feature = np.float64(out[feature_layer]) feature=np.reshape(feature,(1,feature_len)) return featuredef consia_distance(feature1, feature2): ''' @brief: 計算兩個向量的余炫距離。 '''
cx = lambda a, b : round(np.inner(a, b)/(LA.norm(a)LA.norm(b)), 2) consia=cx(feature1,feature2) result = 0.5+0.5consia return resultdef evaluate_by_distance(feature1,feature2):
''' @brief:計算提到的特征之間的距離 @param:feature1 特征1 @param:feature2 特征2 ''' if metric == 'cosine': consia_dist = consia_distance(feature1,feature2) return consia_dist else: mt=pw.pairwise_distances(feature1, feature2, metric) distance=mt[0][0] return distanceimage_formats = ['jpg','png']
feature_dict = dict() def evaluate_kaggle_test(filepath,filename,resultfile='submit.csv'): ''' @brief: 測試evaluate kaggle 數據集合 ''' fid = open(filename) fid.readline() lines = fid.readlines() fid.close() fid =open(resultfile,'w') fid.write("Id,Prediction"+'\n') result = np.zeros((len(lines),)) i = 0 for line in lines: word = line.split(',') filename1 = os.path.join(filepath,word[1].strip()) filename2 = os.path.join(filepath,word[2].strip()) if feature_dict.has_key(filename1): feature1 = feature_dict[filename1] else: feature1 =get_image_feature(filename1) feature_dict[filename1] = feature1 if feature_dict.has_key(filename2): feature2 = feature_dict[filename2] else: feature2 =get_image_feature(filename2) feature_dict[filename2] = feature2
distance = evaluate_by_distance(feature1,feature2) result[i] = distance i=i+1 d_max = np.max(result) d_min =np.min(result) print d_max,d_min i=0 for line in lines: word = line.split(',') fid.write(word[0]+','+str((result[i]-d_min)/(d_max-d_min))+'\n')
i=i+1 fid.close()if name == 'main': filepath = '/media/crw/MyBook/TestData/kaggle_Face_verification_challenge/train_dlib'
#evaluate_kaggle_train(filepath) filepath = '/media/crw/MyBook/TestData/kaggle_Face_verification_challenge/test_dlib_crop' filename = '/media/crw/MyBook/TestData/kaggle_Face_verification_challenge/pairs.csv' resultfile='submission.csv' evaluate_kaggle_test(filepath,filename,resultfile)</pre>
結果
第一次上傳的時候,排名第一:
最終的結果:
</div>