diff --git a/Example.ipynb b/Example.ipynb new file mode 100644 index 0000000..80eedf5 --- /dev/null +++ b/Example.ipynb @@ -0,0 +1,200 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b2e049c7-d5db-45e6-b651-2601c02f4b7d", + "metadata": {}, + "source": [ + "## Data organisation example - IDRiD" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16b65740-249b-4eef-9298-1db01f72d050", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import shutil\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "markdown", + "id": "ff0bf26e-c657-49de-8761-89d5a94c390d", + "metadata": {}, + "source": [ + "### Split val set from train data\n", + "- Images can be processed if necessary, with any processing tools such as [AutoMorph](https://github.com/rmaphoh/AutoMorph)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4bc1cb67-0adf-4640-8640-d0740a39366b", + "metadata": {}, + "outputs": [], + "source": [ + "list_ = pd.read_csv('IDRiD_Disease_Grading_Training_Labels.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b85fc0d1-2049-4550-bdec-76240b1bc759", + "metadata": {}, + "outputs": [], + "source": [ + "noDR = list_.loc[list_['Retinopathy grade']==0, 'Image name']\n", + "mildDR = list_.loc[list_['Retinopathy grade']==1, 'Image name']\n", + "moderateDR = list_.loc[list_['Retinopathy grade']==2, 'Image name']\n", + "severeDR = list_.loc[list_['Retinopathy grade']==3, 'Image name']\n", + "proDR = list_.loc[list_['Retinopathy grade']==4, 'Image name']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0617e35-8b91-45d3-90d5-d5e5bf2d7762", + "metadata": {}, + "outputs": [], + "source": [ + "noDR_train, noDR_val = train_test_split(noDR, test_size=0.2,random_state=1)\n", + "mildDR_train, mildDR_val = train_test_split(mildDR, test_size=0.2,random_state=1)\n", + "moderateDR_train, moderateDR_val = train_test_split(moderateDR, test_size=0.2,random_state=1)\n", + "severeDR_train, severeDR_val = train_test_split(severeDR, test_size=0.2,random_state=1)\n", + "proDR_train, proDR_val = train_test_split(proDR, test_size=0.2,random_state=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f30ce03f-5730-4e68-b6c5-8e1b6b9167f8", + "metadata": {}, + "outputs": [], + "source": [ + "for i in noDR_train:\n", + " shutil.copy('./train_processed/{}.png'.format(i), './train/a_noDR/{}.png'.format(i))\n", + " \n", + "for i in mildDR_train:\n", + " shutil.copy('./train_processed/{}.png'.format(i), './train/b_mildDR/{}.png'.format(i))\n", + " \n", + "for i in moderateDR_train:\n", + " shutil.copy('./train_processed/{}.png'.format(i), './train/c_moderateDR/{}.png'.format(i))\n", + " \n", + "for i in severeDR_train:\n", + " shutil.copy('./train_processed/{}.png'.format(i), './train/d_severeDR/{}.png'.format(i))\n", + " \n", + "for i in proDR_train:\n", + " shutil.copy('./train_processed/{}.png'.format(i), './train/e_proDR/{}.png'.format(i))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "196d1845-3e5e-4d38-82e5-66057a693962", + "metadata": {}, + "outputs": [], + "source": [ + "for i in noDR_val:\n", + " shutil.copy('./train_processed/{}.png'.format(i), './val/a_noDR/{}.png'.format(i))\n", + " \n", + "for i in mildDR_val:\n", + " shutil.copy('./train_processed/{}.png'.format(i), './val/b_mildDR/{}.png'.format(i))\n", + " \n", + "for i in moderateDR_val:\n", + " shutil.copy('./train_processed/{}.png'.format(i), './val/c_moderateDR/{}.png'.format(i))\n", + " \n", + "for i in severeDR_val:\n", + " shutil.copy('./train_processed/{}.png'.format(i), './val/d_severeDR/{}.png'.format(i))\n", + " \n", + "for i in proDR_val:\n", + " shutil.copy('./train_processed/{}.png'.format(i), './val/e_proDR/{}.png'.format(i))" + ] + }, + { + "cell_type": "markdown", + "id": "faf285f4-9079-49ca-9d99-8f3f5718afbf", + "metadata": {}, + "source": [ + "### Organise test set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "118d15d0-9e94-4f6e-855d-dfa3796b24d2", + "metadata": {}, + "outputs": [], + "source": [ + "list_test = pd.read_csv('IDRiD_Disease_Grading_Testing_Labels.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89a098fe-0aad-41d4-ab09-476ff0354c77", + "metadata": {}, + "outputs": [], + "source": [ + "noDR_test = list_test.loc[list_test['Retinopathy grade']==0, 'Image name']\n", + "mildDR_test = list_test.loc[list_test['Retinopathy grade']==1, 'Image name']\n", + "moderateDR_test = list_test.loc[list_test['Retinopathy grade']==2, 'Image name']\n", + "severeDR_test = list_test.loc[list_test['Retinopathy grade']==3, 'Image name']\n", + "proDR_test = list_test.loc[list_test['Retinopathy grade']==4, 'Image name']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33a207c1-1fef-4e79-8ff2-84329062495b", + "metadata": {}, + "outputs": [], + "source": [ + "for i in noDR_test:\n", + " shutil.copy('./test_processed/{}.png'.format(i), './test/a_noDR/{}.png'.format(i))\n", + " \n", + "for i in mildDR_test:\n", + " shutil.copy('./test_processed/{}.png'.format(i), './test/b_mildDR/{}.png'.format(i))\n", + " \n", + "for i in moderateDR_test:\n", + " shutil.copy('./test_processed/{}.png'.format(i), './test/c_moderateDR/{}.png'.format(i))\n", + " \n", + "for i in severeDR_test:\n", + " shutil.copy('./test_processed/{}.png'.format(i), './test/d_severeDR/{}.png'.format(i))\n", + " \n", + "for i in proDR_test:\n", + " shutil.copy('./test_processed/{}.png'.format(i), './test/e_proDR/{}.png'.format(i))" + ] + } + ], + "metadata": { + "environment": { + "kernel": "python3", + "name": "common-cu110.m91", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/base-cu110:m91" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/README.md b/README.md index b58a51c..93449ce 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ pip install -r requirement.txt -- Organise data (use IDRiD as example) +- Organise data (use IDRiD as [example]("Example.ipynb"))