CIFAR-10とは
機械学習の勉強用に画像が必要になりググっていると、
CIFAR-10さんの画像データセットが良さげです。
解像度は32×32と小さいけれど、犬や猫など種類が10種類あり、各6000枚と圧倒的な量です。
Windowsでデータ利用したかったので、
バイナリ提供されているものを画像ファイルに変換することにしました。
データ形式
・解像度:32 x 32
・画像の種類は10種類あります。アメリカンなチョイスですね。
airplane:飛行機
automobile:自動車
bird:鳥
cat:猫
deer:鹿
dog:犬
frog:カエル
horse:馬
ship:船
truck:トラック
・各種類ごとに6000枚。
トレーニング用画像が5000枚。
テスト用画像が1000枚
・バイナリ構造のイメージ図です。
画像バイナリファイルの入手方法
・サイト:The CIFAR-10 dataset
Download先:CIFAR-10 binary version (suitable for C programs)
ダウンロードしたら解凍展開します。
・展開後のフォルダ構成は以下のとおりです。
cifar-10-batches-bin
batches.meta.txt ラベル名
data_batch_1.bin トレーニング画像の1
data_batch_2.bin トレーニング画像の2
data_batch_3.bin トレーニング画像の3
data_batch_4.bin トレーニング画像の4
data_batch_5.bin トレーニング画像の5
test_batch.bin テスト画像
変換ツールの作成
・んで本題です。VSを起動してプロジェクトの作成。
・Nugetで「System.Drawing.Common」をインストールしてください。
・MainWindow.xamlとMainWindow.xaml.csを編集してください。
// MainWindow.xaml.cs using Microsoft.Win32; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Windows; using System.Windows.Controls; using System.Windows.Data; using System.Windows.Documents; using System.Windows.Input; using System.Windows.Media; using System.Windows.Media.Imaging; using System.Windows.Navigation; using System.Windows.Shapes; using System.Drawing; using System.Runtime.InteropServices; using System.Threading; // BackgroundWorker using System.ComponentModel; // BackgroundWorker namespace cifar10 { /// <summary> /// MainWindow.xaml の相互作用ロジック /// </summary> public partial class MainWindow : Window { private BackgroundWorker backgroundWorker1; public MainWindow() { InitializeComponent(); this.progressBar.Minimum = 0; this.progressBar.Maximum = 6; this.backgroundWorker1 = new System.ComponentModel.BackgroundWorker(); this.backgroundWorker1.DoWork += new System.ComponentModel.DoWorkEventHandler(this.func_execute); this.backgroundWorker1.RunWorkerCompleted += new System.ComponentModel.RunWorkerCompletedEventHandler(this.func_end); this.backgroundWorker1.ProgressChanged += new System.ComponentModel.ProgressChangedEventHandler(this.func_progress_change); } private void button_Click(object sender, RoutedEventArgs e) { this.progressBar.Value = 0; this.textMessage.Text = "Start!\r\n"; this.backgroundWorker1.WorkerReportsProgress = true; this.backgroundWorker1.RunWorkerAsync(); } private void func_end(object sender, RunWorkerCompletedEventArgs e) { this.progressBar.Value = 6; this.textMessage.Text += "Complete!\r\n"; } private void func_execute(object sender, DoWorkEventArgs e) { Cifar10FileConvertor cifar = new Cifar10FileConvertor(); cifar.Execute(this.backgroundWorker1); } public void func_progress_change(object sender, ProgressChangedEventArgs e) { this.progressBar.Value = e.ProgressPercentage; } } /// <summary> /// Cifar-10 File To Image File Convertor /// exeの実行フォルダに cifar-10-batches-binフォルダを配置して実行してください /// </summary> public class Cifar10FileConvertor { /// <summary> /// Base Write Dir /// </summary> private readonly string Cifar10WriteDir = @"./work10"; /// <summary> /// Base Read Dir /// </summary> private readonly string Cifar10ReadDir = @"./cifar-10-batches-bin"; /// <summary> /// Training Write Dir /// </summary> private readonly string Cifar10WriteTrainingDir = "data"; /// <summary> /// Testing Write Dir /// </summary> private readonly string Cifar10WriteTestingDir = "test"; /// <summary> /// Cifar-10 Training Image Data File (Read) /// </summary> private readonly string[] Cifar10DataFileName = { "data_batch_1.bin", "data_batch_2.bin", "data_batch_3.bin", "data_batch_4.bin", "data_batch_5.bin", }; /// <summary> /// Cifar-10 Testing Image Data File (Read) /// </summary> private readonly string[] Cifar10TestFileName = { "test_batch.bin", }; /// <summary> /// Cifar-10 Class Name File (Read) /// </summary> private readonly string Cifar10ClassFileName = "batches.meta.txt"; /// <summary> /// Cifar-10 Label Name (Write) /// </summary> private string[] Cifar10LabelName = null; /// <summary> /// Cifar-10 Create File Counter /// </summary> private int[] Cifar10DataCounter = null; /// <summary> /// Cifar-10 Create File Counter /// </summary> private int[] Cifar10TestCounter = null; /// <summary> /// Init /// </summary> public Cifar10FileConvertor() { // Class Name string read_file_path = $"{this.Cifar10ReadDir}/{this.Cifar10ClassFileName}"; if (!File.Exists(read_file_path)) { Console.WriteLine($"{read_file_path} : read file none!"); return; } using (FileStream fs = new FileStream(read_file_path, FileMode.Open, FileAccess.Read)) { using (StreamReader sr = new StreamReader(fs, System.Text.Encoding.GetEncoding("ASCII"))) { string class_name = sr.ReadToEnd(); this.Cifar10LabelName = class_name.Split(new[] {'\n'}, StringSplitOptions.RemoveEmptyEntries); } } // Write Dir Create var stGenDir = this.Cifar10WriteDir; if (!Directory.Exists(stGenDir)) { Directory.CreateDirectory(stGenDir); } var stGenDir2a = $"{stGenDir}/{this.Cifar10WriteTrainingDir}"; if (!Directory.Exists(stGenDir2a)) { Directory.CreateDirectory(stGenDir2a); } var stGenDir2b = $"{stGenDir}/{this.Cifar10WriteTestingDir}"; if (!Directory.Exists(stGenDir2b)) { Directory.CreateDirectory(stGenDir2b); } foreach (var stLabelDir in this.Cifar10LabelName) { var stGenDir3a = $"{stGenDir2a}/{stLabelDir}"; if (!Directory.Exists(stGenDir3a)) { Directory.CreateDirectory(stGenDir3a); } var stGenDir3b = $"{stGenDir2b}/{stLabelDir}"; if (!Directory.Exists(stGenDir3b)) { Directory.CreateDirectory(stGenDir3b); } } // CreateFile Counter clear this.Cifar10DataCounter = new int[this.Cifar10LabelName.Length]; this.Cifar10TestCounter = new int[this.Cifar10LabelName.Length]; for (int i = 0; i < this.Cifar10DataCounter.Length; i++) { this.Cifar10DataCounter[i] = 0; this.Cifar10TestCounter[i] = 0; } } /// <summary> /// Main /// </summary> public void Execute(BackgroundWorker worker) { int total = 0; foreach (var stFileName in this.Cifar10DataFileName) { this.ExecuteFile($"{this.Cifar10ReadDir}/{stFileName}", this.Cifar10WriteDir, this.Cifar10WriteTrainingDir); total++; worker.ReportProgress(total); } foreach (var stFileName in this.Cifar10TestFileName) { this.ExecuteFile($"{this.Cifar10ReadDir}/{stFileName}", this.Cifar10WriteDir, this.Cifar10WriteTestingDir); total++; worker.ReportProgress(total); } int data_total = 0; int test_total = 0; foreach (var cnt in this.Cifar10DataCounter.Select((value, index) => new { value, index })) { Console.WriteLine($"Data File Index:{cnt.index}, Total:{cnt.value}"); data_total += cnt.value; } Console.WriteLine($"Data File Total:{data_total}"); foreach (var cnt in this.Cifar10TestCounter.Select((value, index) => new { value, index })) { Console.WriteLine($"Test File Index:{cnt.index}, Total:{cnt.value}"); test_total += cnt.value; } Console.WriteLine($"Test File Total:{test_total}"); } /// <summary> /// File Open & Execute /// </summary> private void ExecuteFile(string read_file_path, string write_file_dir, string kind_dir) { Console.WriteLine($"Read File:{read_file_path}, Write Dir:{write_file_dir}/{kind_dir}"); try { if (!File.Exists(read_file_path)) { Console.WriteLine($"{read_file_path} : read file none!"); return; } if (!Directory.Exists(write_file_dir)) { Console.WriteLine($"{write_file_dir} : write dir none!"); return; } using (FileStream fs = new FileStream(read_file_path, FileMode.Open, FileAccess.Read)) { using (BinaryReader br = new BinaryReader(fs)) { while (br.PeekChar() >= 0) { byte label_id = br.ReadByte(); string label_name = this.CnvLabelName(label_id); byte[] pixel_cifar_r = br.ReadBytes(1024); byte[] pixel_cifar_g = br.ReadBytes(1024); byte[] pixel_cifar_b = br.ReadBytes(1024); byte[] pixel_bytes = this.CnvCifarToPixel(pixel_cifar_r, pixel_cifar_g, pixel_cifar_b); int idx = this.GetFileCounter(label_id, kind_dir); this.CnvImageFile($"{write_file_dir}/{kind_dir}/{label_name}/{idx.ToString("0000")}.png", pixel_bytes); this.SetFileCounter(label_id, kind_dir); } } } } catch (Exception err) { string err_str = $"Exception:{err.Message}"; Console.WriteLine(err_str); } } /// <summary> /// pixel to image file /// </summary> /// <param name="file_path"></param> /// <param name="pixel_bytes"></param> private void CnvImageFile(string file_path, byte[] pixel_bytes) { var bmp = new Bitmap(32, 32, System.Drawing.Imaging.PixelFormat.Format24bppRgb); var rect = new System.Drawing.Rectangle(0, 0, 32, 32); var bitmapData = bmp.LockBits(rect, System.Drawing.Imaging.ImageLockMode.ReadOnly, System.Drawing.Imaging.PixelFormat.Format24bppRgb); Marshal.Copy(pixel_bytes, 0, bitmapData.Scan0, 3072); bmp.UnlockBits(bitmapData); bmp.Save(file_path, System.Drawing.Imaging.ImageFormat.Png); bmp.Dispose(); } /// <summary> /// LabelIDをLabel名に変換 /// </summary> /// <param name="label_id"></param> /// <returns>Label名</returns> private string CnvLabelName(byte label_id) { if (this.Cifar10LabelName.Length < label_id) { return "other"; } return this.Cifar10LabelName[label_id]; } /// <summary> /// Cifar To Pixel /// </summary> /// <param name="src_r">cifar r</param> /// <param name="src_g">cifar g</param> /// <param name="src_b">cifar b</param> /// <returns>pixel</returns> private byte[] CnvCifarToPixel(byte[] src_r, byte[] src_g, byte[] src_b) { byte[] dst = new byte[3072]; for (int i = 0; i < 1024; i++) { dst[i * 3 + 2] = src_r[i]; dst[i * 3 + 1] = src_g[i]; dst[i * 3 + 0] = src_b[i]; } return dst; } /// <summary> /// Get Counter /// </summary> /// <param name="label_id"></param> /// <param name="kind_dir"></param> private int GetFileCounter(byte label_id, string kind_dir) { if (kind_dir == this.Cifar10WriteTrainingDir) { return this.Cifar10DataCounter[label_id]; } else { return this.Cifar10TestCounter[label_id]; } } /// <summary> /// Set Counter /// </summary> /// <param name="label_id"></param> /// <param name="kind_dir"></param> private void SetFileCounter(byte label_id, string kind_dir) { if (kind_dir == this.Cifar10WriteTrainingDir) { this.Cifar10DataCounter[label_id]++; } else { this.Cifar10TestCounter[label_id]++; } } } }
変換ツールの実行
ビルドして実行すると、画面にボタンでるのでクリックすると画像を作成します。
実行前にexeの実行フォルダに、cifar-10-batches-binフォルダを配置してから実行してください。
参考
http://aidiary.hatenablog.com/entry/20151014/1444827123
https://blog.hatappi.me/entry/2018/02/27/224707