209 lines
6.5 KiB
Plaintext
209 lines
6.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b2e049c7-d5db-45e6-b651-2601c02f4b7d",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Data organisation example - IDRiD"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "16b65740-249b-4eef-9298-1db01f72d050",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import shutil\n",
|
|
"import pandas as pd\n",
|
|
"from sklearn.model_selection import train_test_split"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b12bad44",
|
|
"metadata": {},
|
|
"source": []
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"id": "ff0bf26e-c657-49de-8761-89d5a94c390d",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Split val set from train data\n",
|
|
"- Download dataset from [official website](https://ieee-dataport.org/open-access/indian-diabetic-retinopathy-image-dataset-idrid) \n",
|
|
"- Images can be processed if necessary, with any processing tools such as [AutoMorph](https://github.com/rmaphoh/AutoMorph)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4bc1cb67-0adf-4640-8640-d0740a39366b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"list_ = pd.read_csv('IDRiD_Disease_Grading_Training_Labels.csv')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b85fc0d1-2049-4550-bdec-76240b1bc759",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"noDR = list_.loc[list_['Retinopathy grade']==0, 'Image name']\n",
|
|
"mildDR = list_.loc[list_['Retinopathy grade']==1, 'Image name']\n",
|
|
"moderateDR = list_.loc[list_['Retinopathy grade']==2, 'Image name']\n",
|
|
"severeDR = list_.loc[list_['Retinopathy grade']==3, 'Image name']\n",
|
|
"proDR = list_.loc[list_['Retinopathy grade']==4, 'Image name']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d0617e35-8b91-45d3-90d5-d5e5bf2d7762",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"noDR_train, noDR_val = train_test_split(noDR, test_size=0.2,random_state=1)\n",
|
|
"mildDR_train, mildDR_val = train_test_split(mildDR, test_size=0.2,random_state=1)\n",
|
|
"moderateDR_train, moderateDR_val = train_test_split(moderateDR, test_size=0.2,random_state=1)\n",
|
|
"severeDR_train, severeDR_val = train_test_split(severeDR, test_size=0.2,random_state=1)\n",
|
|
"proDR_train, proDR_val = train_test_split(proDR, test_size=0.2,random_state=1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f30ce03f-5730-4e68-b6c5-8e1b6b9167f8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for i in noDR_train:\n",
|
|
" shutil.copy('./train_processed/{}.png'.format(i), './train/a_noDR/{}.png'.format(i))\n",
|
|
" \n",
|
|
"for i in mildDR_train:\n",
|
|
" shutil.copy('./train_processed/{}.png'.format(i), './train/b_mildDR/{}.png'.format(i))\n",
|
|
" \n",
|
|
"for i in moderateDR_train:\n",
|
|
" shutil.copy('./train_processed/{}.png'.format(i), './train/c_moderateDR/{}.png'.format(i))\n",
|
|
" \n",
|
|
"for i in severeDR_train:\n",
|
|
" shutil.copy('./train_processed/{}.png'.format(i), './train/d_severeDR/{}.png'.format(i))\n",
|
|
" \n",
|
|
"for i in proDR_train:\n",
|
|
" shutil.copy('./train_processed/{}.png'.format(i), './train/e_proDR/{}.png'.format(i))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "196d1845-3e5e-4d38-82e5-66057a693962",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for i in noDR_val:\n",
|
|
" shutil.copy('./train_processed/{}.png'.format(i), './val/a_noDR/{}.png'.format(i))\n",
|
|
" \n",
|
|
"for i in mildDR_val:\n",
|
|
" shutil.copy('./train_processed/{}.png'.format(i), './val/b_mildDR/{}.png'.format(i))\n",
|
|
" \n",
|
|
"for i in moderateDR_val:\n",
|
|
" shutil.copy('./train_processed/{}.png'.format(i), './val/c_moderateDR/{}.png'.format(i))\n",
|
|
" \n",
|
|
"for i in severeDR_val:\n",
|
|
" shutil.copy('./train_processed/{}.png'.format(i), './val/d_severeDR/{}.png'.format(i))\n",
|
|
" \n",
|
|
"for i in proDR_val:\n",
|
|
" shutil.copy('./train_processed/{}.png'.format(i), './val/e_proDR/{}.png'.format(i))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "faf285f4-9079-49ca-9d99-8f3f5718afbf",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Organise test set"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "118d15d0-9e94-4f6e-855d-dfa3796b24d2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"list_test = pd.read_csv('IDRiD_Disease_Grading_Testing_Labels.csv')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "89a098fe-0aad-41d4-ab09-476ff0354c77",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"noDR_test = list_test.loc[list_test['Retinopathy grade']==0, 'Image name']\n",
|
|
"mildDR_test = list_test.loc[list_test['Retinopathy grade']==1, 'Image name']\n",
|
|
"moderateDR_test = list_test.loc[list_test['Retinopathy grade']==2, 'Image name']\n",
|
|
"severeDR_test = list_test.loc[list_test['Retinopathy grade']==3, 'Image name']\n",
|
|
"proDR_test = list_test.loc[list_test['Retinopathy grade']==4, 'Image name']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "33a207c1-1fef-4e79-8ff2-84329062495b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for i in noDR_test:\n",
|
|
" shutil.copy('./test_processed/{}.png'.format(i), './test/a_noDR/{}.png'.format(i))\n",
|
|
" \n",
|
|
"for i in mildDR_test:\n",
|
|
" shutil.copy('./test_processed/{}.png'.format(i), './test/b_mildDR/{}.png'.format(i))\n",
|
|
" \n",
|
|
"for i in moderateDR_test:\n",
|
|
" shutil.copy('./test_processed/{}.png'.format(i), './test/c_moderateDR/{}.png'.format(i))\n",
|
|
" \n",
|
|
"for i in severeDR_test:\n",
|
|
" shutil.copy('./test_processed/{}.png'.format(i), './test/d_severeDR/{}.png'.format(i))\n",
|
|
" \n",
|
|
"for i in proDR_test:\n",
|
|
" shutil.copy('./test_processed/{}.png'.format(i), './test/e_proDR/{}.png'.format(i))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"environment": {
|
|
"kernel": "python3",
|
|
"name": "common-cu110.m91",
|
|
"type": "gcloud",
|
|
"uri": "gcr.io/deeplearning-platform-release/base-cu110:m91"
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|