## Image analysis script for zebrafish GCTs
## Author: John Lafin
## email: john.lafin@utsouthwestern.edu
## Reads in GFP channel image files, applies thresholding, and calculates
## metrics
## Outputs thresholded images (for double checking) and a metrics table

import numpy as np
import pandas as pd
from skimage import io, filters, measure
import glob
import re
import os

## Read in files
file_list = glob.glob("./day_*/*ch01.tif")
img_list = []
for filename in file_list:
    img_list.append(io.imread(filename))
    
## Extract features from filenames
def get_names(f):
    day = re.search(r"\./day_\d+", f).group(0)
    day = re.search(r"\d+", day).group(0)
    fish = re.search(r"(ctrl|cddp)-\d", f).group(0)
    group = re.search(r"(ctrl|cddp)", fish).group(0)
    fish = re.search(r"\d", fish).group(0)
    return {"Filename": f, "Day": day, "Group": group, "Fish": fish}
    
names = pd.DataFrame([get_names(file) for file in file_list])

## Apply automatic thresholding
thresh_list = []
for img in img_list:
    thresh_otsu = filters.threshold_otsu(img[img != 0])
    binary_otsu = img > thresh_otsu
    thresh_list.append(binary_otsu)
    
## Save binary images
days = glob.glob("day*")
folders = ["./thresholds/" + day for day in days]
for folder in folders:
    os.makedirs(folder, exist_ok = True)
for i in range(len(thresh_list)):
    filename = "./thresholds" + names.loc[i, "Filename"][1:]
    io.imsave(filename, arr = thresh_list[i])
    
## Calculate metrics
def get_image_props(img, thresh):
    properties = ["area", "filled_area", "major_axis_length",
                  "minor_axis_length", "mean_intensity"]
    lbl = measure.label(thresh)
    props = measure.regionprops_table(lbl,
                                      intensity_image = img,
                                      properties = properties)
    df = pd.DataFrame(props)
    df = df.assign(integrated_density = lambda x: x.filled_area * x.mean_intensity)
    max_area = df[df.area == np.max(df.area)]
    return max_area
props_combined = pd.concat([get_image_props(img, thresh) for img,thresh in zip(img_list,thresh_list)])

## Combines features from names with metrics and save
total = pd.concat([names, props_combined.reset_index()], axis = 1)
total.to_csv("./skimage_metrics.csv")