update notebooks1

drcandacemakedamoore · Aug 23, 2021 · d0180da · d0180da
1 parent 7143d8f
commit d0180da
Show file tree

Hide file tree

Showing 5 changed files with 265 additions and 694 deletions.
diff --git a/.gitignore b/.gitignore
@@ -16,4 +16,5 @@ test/target/
 /docs/cleanX.rst
 /docs/modules.rst
 /docs/cleanX.*.rst
-/workflow_demo/Coronahack-Chest-XRay-Dataset
+/workflow_demo/Coronahack-Chest-XRay-Dataset
+/workflow_demo/refined
diff --git a/workflow_demo/Untitled.ipynb b/workflow_demo/Untitled.ipynb
diff --git a/workflow_demo/duplicates_note.ipynb b/workflow_demo/duplicates_note.ipynb
diff --git a/workflow_demo/for_medical_people.ipynb b/workflow_demo/for_medical_people.ipynb
diff --git a/workflow_demo/no_classes_workflow_example.ipynb b/workflow_demo/no_classes_workflow_example.ipynb
@@ -56,8 +56,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# import sys\n",
-    "# sys.path = ['D:/projects/cleanX'] + sys.path"
+    "import sys\n",
+    "sys.path = ['D:/projects/cleanX'] + sys.path"
    ]
   },
   {
@@ -327,7 +327,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "origin_folder ='D:/my_academia/ncs/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train/'"
+    "origin_folder ='Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train/'"
    ]
   },
   {
@@ -1074,7 +1074,7 @@
     }
    ],
    "source": [
-    "image_directory = 'D:/my_academia/ncs/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/men'\n",
+    "image_directory = 'Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/men'\n",
     "df_unbias = cximg.dataframe_up_my_pics(image_directory, 'Normal')\n",
     "df_unbias\n",
     "# it's a start if we add these onto our training set"
@@ -1801,7 +1801,7 @@
     "#(set(a).intersection(b))\n",
     "new_fine_list = []\n",
     "for element in good_images_list:\n",
-    "    newel = element.replace('D:/my_academia/ncs/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train\\\\', '')\n",
+    "    newel = element.replace('Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train\\\\', '')\n",
     "    #print(type(element))\n",
     "    new_fine_list.append(newel)"
    ]
@@ -1823,10 +1823,10 @@
    "source": [
     "better_trainer = example_train_csv[example_train_csv.X_ray_image_name.isin(new_fine_list)]\n",
     "\n",
-    "\n",
     "# now let's pick those up and throw them into a different folder wis the csv\n",
     "# can call that folder 'refined'\n",
-    "refined = \"D://refined\"\n",
+    "# note: the data deposited will not be pushed to (rather removed from) my github to save space\n",
+    "refined = \"refined\"\n",
     "if os.path.exists(refined):\n",
     "    print(\"folder for this seems to exist\")\n",
     "else:    \n",
@@ -1875,7 +1875,8 @@
    "source": [
     "# OK, now let's assume we have a good train and test set, and start some exploratory data analysis\n",
     "# we want to do it on the real images, not the frames so first we will crop\n",
-    "target_directory = 'D:/resized_to_same_size2'\n",
+    "# note: the image data deposited will not be pushed to (rather removed from) my github to save space\n",
+    "target_directory = 'resized_to_same_size2'\n",
     "cximg.crop_them_all(refined, target_directory)\n",
     "deflep = cximg.dimensions_to_df(target_directory)"
    ]
@@ -2118,7 +2119,9 @@
     "        Parameters for resize are not obvious\"\"\"\n",
     "    )    \n",
     "# define ta directory to be created\n",
-    "pathy = \"D://resized_to_same_size3\"\n",
+    "\n",
+    "# note: the data deposited will not be pushed to (rather removed from) my github to save space\n",
+    "pathy = \"resized_to_same_size3\"\n",
     "if os.path.exists(pathy):\n",
     "    print(\"folder for this seems to exist\")\n",
     "else:    \n",
@@ -2143,7 +2146,8 @@
     "original_files1 = glob.glob(os.path.join(origin_folder, '*.jpg'))\n",
     "original_files2 = glob.glob(os.path.join(origin_folder, '*.jpeg'))\n",
     "original_files = original_files1  + original_files2\n",
-    "target = 'D://resized_to_same_size3'\n",
+    "# note: the data deposited will not be pushed to (rather removed from) my github to save space\n",
+    "target = 'resized_to_same_size3'\n",
     "\n",
     "for f in original_files:\n",
     "    shutil.copy(f, target)"
@@ -2159,7 +2163,8 @@
     "# warning - this operation may take a relatively long time depending on number\n",
     "#%%time\n",
     "#of images - coffee break time!\n",
-    "g = r'D://resized_to_same_size3'\n",
+    "# note: the data deposited will not be pushed to (rather removed from) my github to save space\n",
+    "g = 'resized_to_same_size3'\n",
     "count = 0\n",
     "for file in os.listdir(g):\n",
     "    f_img = g+\"/\"+file\n",
@@ -2209,7 +2214,8 @@
    "source": [
     "# I just dont' care about blurry images today...but I could have pulled them\n",
     "# let's make an average image of all (jpeg) trains from our original data\n",
-    "train_image_directory = 'D://resized_to_same_size3'\n",
+    "# note: the data deposited will not be pushed to (rather removed from) my github to save space\n",
+    "train_image_directory = 'resized_to_same_size3'\n",
     "#lotus = cx.avg_image(example_train_csv,'X_ray_image_name', 'Label',g)\n",
     "lotus = cximg.avg_image_maker((glob.glob(os.path.join(train_image_directory, '*.jpeg'))))\n"
    ]
@@ -8026,16 +8032,14 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 44,
-   "id": "educated-senate",
+   "cell_type": "markdown",
+   "id": "92266b5e",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# interesting, there was some text, \n",
-    "# i.e. \"'RX DEL TORACE AL LETTO DEL PAZIENTE\"\n",
-    "# but not that said pneumonia, or COVID, let's move on to find ourlieing images by comparing to a tiny one.\n",
-    "# we can also make augmented images"
+    "interesting, there was some text, \n",
+    "i.e. \"'RX DEL TORACE AL LETTO DEL PAZIENTE\"\n",
+    "but not text that said pneumonia, or COVID, .\n",
+    "We can also make augmented images"
    ]
   },
   {
@@ -8047,23 +8051,25 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "organic-circle",
+   "cell_type": "markdown",
+   "id": "54591922",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# rest of notebook, which should show our normalization functions and augmentation functions, reorganized into a better workflow coming soon"
+    "We have several functions for augmenting images. These include but are not limited to:\n",
+    "     subtle_sharpie_enhance\n",
+    "     salting\n",
+    "     simple_rotation_augmentation\n",
+    "     blur_out_edges\n",
+    "These functions are in the image_functions.py file. Feel free to ask for a different one in disucssions. \n",
+    " "
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "fc019447",
+   "cell_type": "markdown",
+   "id": "7d8c12fa",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# we will make sure to include out spiffy histogram based normalization"
+    "We have spiffy histogram based normalization- but it is easier to use with classes. Check out our other workflow \"classes_workflow\" to see it in action."
    ]
   },
   {
@@ -8243,7 +8249,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -8257,7 +8263,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.7.0"
   },
   "toc": {
    "base_numbering": 1,