from docx import Document
from tabulate import tabulate

document = Document('/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-Zr experimentally observed phases.docx')
tables = [table for table in document.tables]

table_data = {table.rows[0].cells[0].text.strip():
              {"headers": [cell.text.strip() for cell in table.rows[1].cells],
               "rows": [[cell.text.strip() for cell in row.cells] for row in table.rows[2:]]} for table in tables}

for table_name, table in table_data.items():
    print(f"{table_name}")
    print(tabulate(table['rows'], headers=table['headers'], tablefmt="grid"))
    print()

Pu-10Zr alloy
+-----------+-----------+------------+---------------+------------------------------------------+
| Phase     | As-cast   | Annealed   | Space group   | Lattice parameters                       |
+===========+===========+============+===============+==========================================+
| α-Zr      | X         | X          | P63/mmc       | a=b=0.32±0.001; c=0.51±0.001             |
+-----------+-----------+------------+---------------+------------------------------------------+
| ZrO2      | X         | X          | P21/c         | a=0.51±0.001; b=0.52±0.001; c=0.53±0.001 |
+-----------+-----------+------------+---------------+------------------------------------------+
| Zr3O      | X         | -          | R32 h         | a=b=0.56±0.001; c=3.14±0.001             |
+-----------+-----------+------------+---------------+------------------------------------------+
| Zr3O      | X         | -          | R-3c h        | a=b=0.56±0.001; c=1.57±0.001             |
+-----------+-----------+------------+---------------+------------------------------------------+
| PuO       | X         | -          | Fm-3m         | a=b=c=0.49±0.002                         |
+-----------+-----------+------------+---------------+------------------------------------------+
| PuO2      | -         | X          | Fm-3m         | a=b=c=0.53±0.002                         |
+-----------+-----------+------------+---------------+------------------------------------------+
| δ-(Pu,Zr) | X         | X          | Fm-3m         | a=b=c=0.45±0.001                         |
+-----------+-----------+------------+---------------+------------------------------------------+
| κ-PuZr2   | X         | -          | P6/mmm        | a=b=0.50±0.001; c=0.312±0.001            |
+-----------+-----------+------------+---------------+------------------------------------------+
| δ'-Pu     | -         | X          | Fm-3m         | a=b=c=0.46±0.003                         |
+-----------+-----------+------------+---------------+------------------------------------------+
| β-Pu      | -         | X          | C12/m1        | a=1.18±0.002; b=1.04±0.002; c=0.78±0.002 |
+-----------+-----------+------------+---------------+------------------------------------------+

Pu-30Zr alloy
+-----------+-----------+------------+---------------+------------------------------------------+
| Phase     | As-cast   | Annealed   | Space group   | Lattice parameters                       |
+===========+===========+============+===============+==========================================+
| α-Zr      | X         | X          | P63/mmc       | a=b=0.32±0.001; c=0.51±0.001             |
+-----------+-----------+------------+---------------+------------------------------------------+
| ZrO2      | X         | X          | P21/c         | a=0.51±0.001; b=0.52±0.001; c=0.53±0.001 |
+-----------+-----------+------------+---------------+------------------------------------------+
| δ-(Pu,Zr) | X         | X          | Fm-3m         | a=b=c=0.45±0.001                         |
+-----------+-----------+------------+---------------+------------------------------------------+


import os
path = "/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata"
for item in os.scandir(path):
    if item.is_file():
        print("File: " + item.name)
    elif item.is_dir():
        print("Directory: " + item.name + "/")
        for sub_item in os.scandir(item.path):
            if sub_item.is_file():
                print("\tFile: " + sub_item.name)
            elif sub_item.is_dir():
                print("\tDirectory: " + sub_item.name + "/")

Directory: Pu-10Zr/
	Directory: 1A as-cast/
	Directory: 1A as-cast simulated/
	Directory: 1D as-cast/
	Directory: 1D as-cast simulated/
	Directory: 2A as-cast/
	Directory: 2A exp annealed/
	Directory: 2B exp annealed/
	Directory: 2C exp annealed/
	Directory: 2D as-cast/
	Directory: 2D exp annealed/
	File: Pu-10Zr info combined.xlsx
Directory: Pu-30Zr/
	Directory: 1A exp annealed/
	Directory: 1A exp as-cast/
	Directory: 1D exp as-cast/
	Directory: 2B exp as-cast/
	Directory: 2C exp as-cast/
	File: Pu-30Zr info combined.xlsx
File: Pu-Zr experimentally observed phases.docx


import pandas as pd
df = pd.read_excel("/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-10Zr/Pu-10Zr info combined.xlsx")
print(df.head())

  Folder name                           Image ID   Phase ID Zone axis
0  1A as-cast   01_zxxx_CL175mm_x=-8.98, y=-7.75  δ-(Pu,Zr)     [001]
1  1A as-cast  01_zxxx_CL175mm_x=-23.44, y=-5.11  δ-(Pu,Zr)    [1-14]
2  1A as-cast   01_zxxx_CL175mm_x=-23.44, y=7.26  δ-(Pu,Zr)     [013]
3  1A as-cast      01_zxxx_CL175mm_x=-24, y=8.36  δ-(Pu,Zr)     [013]
4  1A as-cast    01_zxxx_CL175mm_x=1.62, y=16.66  δ-(Pu,Zr)   [0-1-3]


# Now lets look into the first folder

folder = '/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-10Zr/1A as-cast'
images = os.listdir(folder)

table = [[i+1, images[i]] for i in range(5)] + \
        [[len(images)-4+j, images[len(images)-5+j]] for j in range(5)]

print(tabulate(table, headers=["Image ID", "Filename"]))

  Image ID  Filename
----------  --------------------------------------------------------------------------------------------------------
         1  01_zxxx_CL175mm_x=-23.44, y=-5.11.tif
         2  01_zxxx_CL175mm_x=-23.44, y=7.26.tif
         3  01_zxxx_CL175mm_x=-24, y=8.36.tif
         4  01_zxxx_CL175mm_x=-8.98, y=-7.75.tif
         5  01_zxxx_CL175mm_x=1.62, y=16.66.tif
        97  Transmission Electron Diffraction- 20_zxxx_CL175mm_x=0.25, y=6.1 Image delta-Pu,Zr [-1-1-2] Angle 50.tif
        98  Transmission Electron Diffraction- 20_zxxx_CL175mm_x=0.25, y=6.1 Image delta-Pu,Zr [-1-1-2] Angle 60.tif
        99  Transmission Electron Diffraction- 20_zxxx_CL175mm_x=0.25, y=6.1 Image delta-Pu,Zr [-1-1-2] Angle 70.tif
       100  Transmission Electron Diffraction- 20_zxxx_CL175mm_x=0.25, y=6.1 Image delta-Pu,Zr [-1-1-2] Angle 80.tif
       101  Transmission Electron Diffraction- 20_zxxx_CL175mm_x=0.25, y=6.1 Image delta-Pu,Zr [-1-1-2] Angle 90.tif


from IPython.display import display
import matplotlib.pyplot as plt
import os 

def my_function():
    dir_path = "/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-10Zr/1A as-cast"
    files = os.listdir(dir_path)

    fig, axes = plt.subplots(1, 2, figsize=(14, 7))

    # Display the first image with a number prefix
    for i in range(len(files)):
        if files[i].startswith(('01', '02')):
            img = plt.imread(f'{dir_path}/{files[i]}')
            axes[0].imshow(img)
            axes[0].set_title(files[i][:20])
            break

    # Displaying the first image starting with "Transmission Electron Diffraction"
    for i in range(len(files)):
        if files[i].startswith('Transmission Electron Diffraction'):
            img = plt.imread(f'{dir_path}/{files[i]}')
            axes[1].imshow(img)
            axes[1].set_title(files[i][:20])
            break

    plt.show()
    return None


my_output = my_function()
display(my_output)

None


import os
import tifffile

root_path = "/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/"

image_paths = []

for dirpath, dirnames, filenames in os.walk(root_path):
    for filename in filenames:
        if filename.endswith(".tif"):
            image_paths.append(os.path.join(dirpath, filename))
image = tifffile.imread(image_paths[0])


print(f"Image shape: {image.shape}")
print(f"Image data type: {image.dtype}")
print(f"Image max pixel value: {image.max()}")
print(f"Image min pixel value: {image.min()}")

Image shape: (1336, 2004)
Image data type: uint8
Image max pixel value: 255
Image min pixel value: 0


import numpy as np

image_mean = np.mean(image)
image_std = np.std(image)

print("Image mean:", image_mean)
print("Image standard deviation:", image_std)

Image mean: 48.26778777773794
Image standard deviation: 22.830947858093005


import matplotlib.pyplot as plt

plt.hist(image.ravel(), bins=256, range=(0, 255))
plt.xlabel('Pixel value')
plt.ylabel('Frequency')
plt.show()


import os
import tifffile

folder_path = "/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/"
image_properties = {}

# Looping over all subdirectories and images
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith(".tif"):
            image_path = os.path.join(root, file)
            image = tifffile.imread(image_path)
            image_shape = image.shape
            image_dtype = str(image.dtype)
            image_max = image.max()
            image_min = image.min()

            # Add the image properties to the dictionary
            image_key = (image_shape, image_dtype, image_max, image_min)
            if image_key in image_properties:
                image_properties[image_key] += 1
            else:
                image_properties[image_key] = 1

# Printing the number of images with each set of properties
for key, value in image_properties.items():
    print(f"{key}: {value} images")

((1336, 2004), 'uint8', 255, 0): 551 images
((844, 1543, 4), 'uint8', 255, 0): 251 images
((844, 1551, 4), 'uint8', 255, 0): 830 images
((1334, 2004), 'uint8', 255, 0): 327 images


import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

# Loading the real and simulated images
real_image = np.array(Image.open('C:/Users//iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-10Zr/1A as-cast/20_zxxx_CL175mm_x=0.25, y=6.1.tif').convert('L'))
simulated_image = np.array(Image.open('C:/Users//iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-10Zr/1A as-cast/Transmission Electron Diffraction- 20_zxxx_CL175mm_x=0.25, y=6.1 Image delta-Pu,Zr [-1-1-2] Angle 00.tif').convert('L'))

# Printing the shape and data type of the images
print('Real image shape:', real_image.shape)
print('Real image data type:', real_image.dtype)
print('Simulated image shape:', simulated_image.shape)
print('Simulated image data type:', simulated_image.dtype)

# Displaying the images side by side
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(16, 16))
ax[0].imshow(real_image, cmap='gray')
ax[0].set_title('Real Image')
ax[1].imshow(simulated_image, cmap='gray')
ax[1].set_title('Simulated Image')
plt.figure(figsize=(20, 20))
plt.show()

Real image shape: (1336, 2004)
Real image data type: uint8
Simulated image shape: (844, 1543)
Simulated image data type: uint8

<Figure size 2000x2000 with 0 Axes>


# Comparing the pixel values of the two images
real_hist, _ = np.histogram(real_image, bins=256)
simulated_hist, _ = np.histogram(simulated_image, bins=256)

plt.plot(real_hist, label='Real image')
plt.plot(simulated_hist, label='Simulated image')
plt.legend()
plt.show()


from scipy.stats import entropy

real_entropy = entropy(real_image)
simulated_entropy = entropy(simulated_image)

print('Real image entropy:', real_entropy)
print('Simulated image entropy:', simulated_entropy)

Real image entropy: [6.98613008 7.00299661 7.00963621 ... 7.06376016 7.06024204 7.06698228]
Simulated image entropy: [6.73801766 6.73797722 6.73797722 ... 6.73797722 6.73797722 6.73797722]


import numpy as np
import matplotlib.pyplot as plt

# Calculating the 2D DFT of the images
real_dft = np.fft.fft2(real_image)
simulated_dft = np.fft.fft2(simulated_image)

# Shifting the DC component to the center of the spectrum
real_dft = np.fft.fftshift(real_dft)
simulated_dft = np.fft.fftshift(simulated_dft)

# Calculating the magnitude spectrum of the DFT
real_spectrum = 20 * np.log10(np.abs(real_dft))
simulated_spectrum = 20 * np.log10(np.abs(simulated_dft))

# Displaying the magnitude spectra
plt.figure(figsize=(12, 12))
plt.subplot(1, 2, 1)
plt.imshow(real_spectrum, cmap='gray')
plt.title('Real image spectrum')
plt.subplot(1, 2, 2)
plt.imshow(simulated_spectrum, cmap='gray')
plt.title('Simulated image spectrum')

Text(0.5, 1.0, 'Simulated image spectrum')


import os
from PIL import Image

folder_path = "C:/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/"
image_properties = {}

# Loop over all subdirectories and images
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith(".tif"):
            image_path = os.path.join(root, file)
            image = Image.open(image_path)
            image_shape = image.size
            image_dtype = str(image.mode)
            image_max = image.getextrema()[1]
            image_min = image.getextrema()[0]

            # Add the image properties to the dictionary
            image_key = (image_shape, image_dtype, image_max, image_min)
            if image_key in image_properties:
                image_properties[image_key] += 1
            else:
                image_properties[image_key] = 1

# Print the number of images with each set of properties
for key, value in image_properties.items():
    print(f"{key}: {value} images")

((2004, 1336), 'P', 255, 0): 551 images
((1543, 844), 'RGBA', (0, 255), (0, 255)): 251 images
((1551, 844), 'RGBA', (0, 255), (0, 255)): 830 images
((2004, 1334), 'P', 255, 0): 327 images


import os
import numpy as np
from PIL import Image
from scipy.stats import entropy

rawdata_dir = r'C:\Users\iuruc\OneDrive\Desktop\GIT\project\rawdata'
image_properties = {}

with open('image_properties.txt', 'w') as f:
    for root, dirs, files in os.walk(rawdata_dir):
        for file in files:
            if file.endswith('.tif'):
                # Load the image
                image_path = os.path.join(root, file)
                image = Image.open(image_path)
                image_array = np.array(image)

                # Get the image properties
                image_shape = image_array.shape
                image_dtype = image_array.dtype
                image_max = np.max(image_array)
                image_min = np.min(image_array)
                image_entropy = np.mean(entropy(image_array, axis=(0,1)))

                # Add the image properties to the dictionary
                image_key = (image_shape, image_dtype, image_max, image_min)
                if image_key in image_properties:
                    image_properties[image_key]['count'] += 1
                else:
                    image_properties[image_key] = {'count': 1, 'entropy': image_entropy}

                # Write the image properties to the file
                f.write(f'File: {image_path}\n')
                f.write(f'Properties: {image_key}\n')
                f.write(f'Count: {image_properties[image_key]["count"]}\n')
                f.write(f'Entropy: {image_properties[image_key]["entropy"]:.2f}\n\n')


# Then, we analyzed the text file to count the number of images with each unique entropy value and printed the results.
import os

rawdata_dir = r'C:\Users\iuruc\OneDrive\Desktop\GIT\project\rawdata'
image_count = 0

for root, dirs, files in os.walk(rawdata_dir):
    for file in files:
        if file.endswith('.tif'):
            image_count += 1

print(f'Total number of images: {image_count}')

Total number of images: 1959


# What we did here is we counted the number of lines in the image_properties.txt file and divided it by 4 since we wrote 4 lines of information for each image.
# This allowed us to find the total number of images for which we have recorded properties.
with open('image_properties.txt', 'r') as f:
    line_count = len([line for line in f if line.strip()])

image_count = line_count // 4

print(f"Total number of images with recorded properties: {image_count}")

Total number of images with recorded properties: 1959


import re

entropy_count = {}

with open('image_properties.txt', 'r') as f:
    for line in f:
        if line.startswith('Entropy'):
            entropy_matches = re.findall(r'[\d.]+', line)
            if entropy_matches:
                entropy_value = float(entropy_matches[0])
                if entropy_value in entropy_count:
                    entropy_count[entropy_value] += 1
                else:
                    entropy_count[entropy_value] = 1

print(entropy_count)

{14.7: 551, 13.69: 251, 13.85: 830, 14.52: 327}


labels = np.load('C:/Users/iuruc/OneDrive/Desktop/GIT/project/original_dataset_labels.npy')
counts = np.bincount(labels)
print(counts)

[364 282 219]


import os
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt


def preprocess_image(image):
    # Converting the PIL image to a NumPy array
    image_array = np.array(image)

    # Checking if the image is already in grayscale
    if len(image_array.shape) == 2:
        gray_image = image_array
    else:
        # Converting the image to grayscale
        gray_image = cv2.cvtColor(image_array, cv2.COLOR_BGR2GRAY)

    return gray_image


def detect_diffraction_pattern(image_path, param1=50, param2=30, min_radius=0, max_radius=0):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    preprocessed_image = preprocess_image(image)
    
    blurred_image = cv2.GaussianBlur(preprocessed_image, (9, 9), 2)
    
    circles = cv2.HoughCircles(blurred_image, cv2.HOUGH_GRADIENT, 1, 20,
                               param1=param1, param2=param2,
                               minRadius=min_radius, maxRadius=max_radius)
    
    return circles


def visualize_circle_detection(image_path, circles):
    # Loading the image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Drawing the detected circles
    if circles is not None:
        circles = np.uint16(np.around(circles))
        for i in circles[0, :]:
            cv2.circle(image, (i[0], i[1]), i[2], (0, 255, 0), 2)
            cv2.circle(image, (i[0], i[1]), 2, (0, 0, 255), 3)

    # Displaying the image with the detected circles
    plt.imshow(image)
    plt.title('Detected Circles')
    plt.axis('off')
    plt.show()


def process_directory(directory_path, param1=50, param2=30, min_radius=0, max_radius=0):
    count = 0
    for filename in os.listdir(directory_path):
        if count >= 5:
            break
        if filename.lower().endswith(('.tif', '.tiff')):
            image_path = os.path.join(directory_path, filename)
            circles = detect_diffraction_pattern(image_path, param1, param2, min_radius, max_radius)
            print(f"Processing {image_path}")
            if count < 5:
                visualize_circle_detection(image_path, circles)
            count += 1


directory_path = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-10Zr/1A as-cast'
process_directory(directory_path, param1=35, param2=25, min_radius=1, max_radius=15)

Processing C:/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-10Zr/1A as-cast\01_zxxx_CL175mm_x=-23.44, y=-5.11.tif

Processing C:/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-10Zr/1A as-cast\01_zxxx_CL175mm_x=-23.44, y=7.26.tif

Processing C:/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-10Zr/1A as-cast\01_zxxx_CL175mm_x=-24, y=8.36.tif

Processing C:/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-10Zr/1A as-cast\01_zxxx_CL175mm_x=-8.98, y=-7.75.tif

Processing C:/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-10Zr/1A as-cast\01_zxxx_CL175mm_x=1.62, y=16.66.tif


def calculate_center(circles):
    if circles is None:
        return None

    centers = np.array([[circle[0], circle[1]] for circle in circles[0]])
    center = np.mean(centers, axis=0)
    return center


def crop_image(image, center, size=512):
    y, x = int(center[1]), int(center[0])
    half_size = size // 2

    cropped_image = image[y - half_size:y + half_size, x - half_size:x + half_size]
    return cropped_image


def process_and_crop_directory(directory_path, output_dir, param1=50, param2=30, min_radius=0, max_radius=0):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for filename in os.listdir(directory_path):
        if filename.lower().endswith(('.tif', '.tiff')):
            image_path = os.path.join(directory_path, filename)
            circles = detect_diffraction_pattern(image_path, param1, param2, min_radius, max_radius)

            center = calculate_center(circles)
            if center is not None:
                image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
                cropped_image = crop_image(image, center, size=512)

                output_path = os.path.join(output_dir, filename)
                cv2.imwrite(output_path, cropped_image)
                
            else:
                print(f"Could not find a center for {image_path}")


input_directory = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/Pu-10Zr/1A as-cast'
output_directory = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/processed_images'

process_and_crop_directory(input_directory, output_directory, param1=35, param2=25, min_radius=1, max_radius=15)


import shutil

source_directory = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata/'
destination_directory = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/allin/'

if not os.path.exists(destination_directory):
    os.makedirs(destination_directory)

for root, dirs, files in os.walk(source_directory):
    for filename in files:
        if filename.lower().endswith(('.tif', '.tiff')):
            source_path = os.path.join(root, filename)
            destination_path = os.path.join(destination_directory, filename)
            shutil.copy(source_path, destination_path)


image_properties_file = 'image_properties.txt'
rawdata_directory = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/rawdata'
real_images_directory = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/real_images'
simulated_images_directory = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/simulated_images'

if not os.path.exists(real_images_directory):
    os.makedirs(real_images_directory)
if not os.path.exists(simulated_images_directory):
    os.makedirs(simulated_images_directory)

with open(image_properties_file, 'r') as f:
    for line in f:
        if line.startswith('File'):
            relative_path = line.split(':')[-1].strip()
            source_path = os.path.join(rawdata_directory, relative_path)
        elif line.startswith('Entropy'):
            entropy_matches = re.findall(r'[\d.]+', line)
            if entropy_matches:
                entropy_value = float(entropy_matches[0])
                if entropy_value >= 14.0:
                    destination_path = os.path.join(real_images_directory, os.path.basename(relative_path))
                else:
                    destination_path = os.path.join(simulated_images_directory, os.path.basename(relative_path))
                shutil.copy(source_path, destination_path)


input_directory = "C:/Users/iuruc/OneDrive/Desktop/GIT/project/real_images"
output_directory = "C:/Users/iuruc/OneDrive/Desktop/GIT/project/real_images_png/"

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

for filename in os.listdir(input_directory):
    if filename.lower().endswith(".tif") or filename.lower().endswith(".tiff"):
        input_filepath = os.path.join(input_directory, filename)
        output_filepath = os.path.join(output_directory, os.path.splitext(filename)[0] + ".png")
        img = Image.open(input_filepath)
        img.save(output_filepath)


import json
# Loading the JSON file
with open('via_project_22Mar2023_23h40m_json.json') as f:
    data = json.load(f)

# Extracting the annotations
annotations = data.values()

# Printing the first 10 annotations
for i, annotation in enumerate(annotations):
    if i >= 10:
        break
    print(annotation)
    print()

{'filename': '01_zxx_CL175mm_x=-0.79, b=-23.65.png', 'size': 2035958, 'regions': [{'shape_attributes': {'name': 'point', 'cx': 776, 'cy': 540}, 'region_attributes': {'center': 'center point'}}], 'file_attributes': {}}

{'filename': '01_zxx_CL175mm_x=13.26, b=0.png', 'size': 2040093, 'regions': [{'shape_attributes': {'name': 'point', 'cx': 960, 'cy': 705}, 'region_attributes': {'center': 'center point'}}], 'file_attributes': {}}

{'filename': '01_zxx_CL175mm_x=29.56, b=-15.23.png', 'size': 2062162, 'regions': [{'shape_attributes': {'name': 'point', 'cx': 1049, 'cy': 778}, 'region_attributes': {'center': 'center point'}}], 'file_attributes': {}}

{'filename': '01_zxxx_CL175mm_x=-0.19, y=6.16.png', 'size': 1788521, 'regions': [{'shape_attributes': {'name': 'point', 'cx': 954, 'cy': 565}, 'region_attributes': {'center': 'center point'}}], 'file_attributes': {}}

{'filename': '01_zxxx_CL175mm_x=-1.08, y=-11.16.png', 'size': 1864541, 'regions': [{'shape_attributes': {'name': 'point', 'cx': 964, 'cy': 622}, 'region_attributes': {'center': 'center point'}}], 'file_attributes': {}}

{'filename': '01_zxxx_CL175mm_x=1.62, y=16.66.png', 'size': 1953477, 'regions': [{'shape_attributes': {'name': 'point', 'cx': 996, 'cy': 729}, 'region_attributes': {'center': 'center point'}}], 'file_attributes': {}}

{'filename': '01_zxxx_CL175mm_x=-1.74, y=10.72.png', 'size': 1816279, 'regions': [{'shape_attributes': {'name': 'point', 'cx': 951, 'cy': 508}, 'region_attributes': {'center': 'center point'}}], 'file_attributes': {}}

{'filename': '01_zxxx_CL175mm_x=2.5, y=-14.13.png', 'size': 1810537, 'regions': [{'shape_attributes': {'name': 'point', 'cx': 919, 'cy': 511}, 'region_attributes': {'center': 'center point'}}], 'file_attributes': {}}

{'filename': '01_zxxx_CL175mm_x=-2.7, y=-0.82.png', 'size': 1736244, 'regions': [{'shape_attributes': {'name': 'point', 'cx': 919, 'cy': 552}, 'region_attributes': {'center': 'center point'}}], 'file_attributes': {}}

{'filename': '01_zxxx_CL175mm_x=-2.11, y=1.37.png', 'size': 1997791, 'regions': [{'shape_attributes': {'name': 'point', 'cx': 1033, 'cy': 581}, 'region_attributes': {'center': 'center point'}}], 'file_attributes': {}}


# Loading the JSON file
with open('via_project_22Mar2023_23h40m.json') as f:
    data = json.load(f)

# Separating the metadata for labeled and unlabeled images
labeled_metadata = {}
unlabeled_metadata = {}
for filename, metadata in data['_via_img_metadata'].items():
    if metadata.get('regions'):
        labeled_metadata[filename] = metadata
    else:
        unlabeled_metadata[filename] = metadata

# Printing the number of labeled and unlabeled images
print("Number of labeled images:", len(labeled_metadata))
print("Number of unlabeled images:", len(unlabeled_metadata))

Number of labeled images: 700
Number of unlabeled images: 178


# let's start by loading the labeled images and their annotations.

# Loading the JSON file containing the annotations
with open('via_project_22Mar2023_23h40m.json') as f:
    data = json.load(f)

# Creating a list of labeled images and their corresponding annotations
labeled_images = []
for filename, metadata in data['_via_img_metadata'].items():
    if metadata.get('regions'):
        # Load the image
        image = cv2.imread(filename)

        # Extractin the annotations
        annotations = []
        for region in metadata['regions']:
            shape_attributes = region['shape_attributes']
            center_x = shape_attributes['cx']
            center_y = shape_attributes['cy']
            annotations.append((center_x, center_y))

        labeled_images.append((image, annotations))


for i in range(3):
    image, annotations = labeled_images[i]
    print("Annotations for image", i+1)
    print(annotations)
    print()


import csv
# Load the JSON file containing the annotations
with open('via_project_22Mar2023_23h40m.json') as f:
    data = json.load(f)

# Creating a list of center points and filenames
center_points = []
for filename, metadata in data['_via_img_metadata'].items():
    if metadata.get('regions'):
        # Extracting the center point coordinates
        for region in metadata['regions']:
            shape_attributes = region['shape_attributes']
            center_x = shape_attributes['cx']
            center_y = shape_attributes['cy']
            # Replacing the file extension from .png with .tif
            filename = os.path.splitext(filename)[0] + '.tif'
            center_points.append((filename, center_x, center_y))

# Saving the center points to a CSV file
with open('center_points.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['filename', 'center_x', 'center_y'])
    writer.writerows(center_points)


os.makedirs('C:/Users/iuruc/OneDrive/Desktop/GIT/project/labeled_images', exist_ok=True)
os.makedirs('C:/Users/iuruc/OneDrive/Desktop/GIT/project/unlabeled_images', exist_ok=True)

import re
import os
import shutil

labeled_folder = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/labeled_images'
with open('center_points.csv', 'r') as f:
    lines = f.readlines()

for line in lines[1:]:
    filename_match = re.search(r'\"?(.+\.tif)\"?', line)
    if filename_match:
        filename = filename_match.group(1)
        src_path = os.path.join('C:/Users/iuruc/OneDrive/Desktop/GIT/project/real_images', filename)
        dst_path = os.path.join(labeled_folder, filename)
        shutil.copy2(src_path, dst_path)
    else:
        print(f"Failed to copy file for line: {line}")


unlabeled_folder = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/unlabeled_images'
real_folder = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/real_images'

# Getting a list of all the images in the real_images folder
real_images = os.listdir(real_folder)

# Getting a list of all the labeled images in the labeled_images folder
labeled_images = os.listdir(labeled_folder)

# Copying the unlabeled images to the unlabeled_images folder
for image in real_images:
    if image not in labeled_images:
        src_path = os.path.join(real_folder, image)
        dst_path = os.path.join(unlabeled_folder, image)
        shutil.copy2(src_path, dst_path)


import os
import cv2

image_directory = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/labeled_images'
image_files = [f for f in os.listdir(image_directory) if f.endswith('.tif')]

target_size = (2004, 1334)

for image_file in image_files:
    image_path = os.path.join(image_directory, image_file)
    
    image = cv2.imread(image_path)
    resized_image = cv2.resize(image, target_size)
    
    cv2.imwrite(image_path, resized_image)


import os
import cv2

image_directory = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/labeled_images'

# Getting a list of all image file names in the directory
image_files = [f for f in os.listdir(image_directory) if f.endswith('.tif')]

image_sizes = set()

for image_file in image_files:
    image_path = os.path.join(image_directory, image_file)
    image = cv2.imread(image_path)
    height, width = image.shape[:2]
    image_sizes.add((width, height))

print("Unique image sizes:", image_sizes)

Unique image sizes: {(2004, 1334)}


import os
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

def preprocess_image(image):
    image_array = np.array(image)
    if len(image_array.shape) == 2:
        gray_image = image_array
    else:
        gray_image = cv2.cvtColor(image_array, cv2.COLOR_BGR2GRAY)

    return gray_image


def detect_diffraction_pattern(image_path, param1=35, param2=25, min_radius=1, max_radius=15):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    preprocessed_image = preprocess_image(image)
    blurred_image = cv2.GaussianBlur(preprocessed_image, (9, 9), 2)
    
    circles = cv2.HoughCircles(blurred_image, cv2.HOUGH_GRADIENT, 1, 20,
                               param1=param1, param2=param2,
                               minRadius=min_radius, maxRadius=max_radius)
    
    return circles


def calculate_center(circles):
    if circles is None or len(circles[0]) < 3:
        return None

    centers = np.array([[circle[0], circle[1]] for circle in circles[0]])
    center = np.mean(centers, axis=0)
    return center


def crop_image_safe(image, center, size=512):
    h, w = image.shape
    half_size = size // 2

    if center is None:
        x, y = w // 2, h // 2
    else:
        y, x = int(center[1]), int(center[0])

    y_min, y_max = max(0, y - half_size), min(h, y + half_size)
    x_min, x_max = max(0, x - half_size), min(w, x + half_size)

    cropped_image = image[y_min:y_max, x_min:x_max]
    return cropped_image


def process_and_crop_directory(directory_path, output_dir, param1=50, param2=30, min_radius=0, max_radius=0):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for filename in os.listdir(directory_path):
        if filename.lower().endswith(('.tif', '.tiff')):
            image_path = os.path.join(directory_path, filename)
            circles = detect_diffraction_pattern(image_path, param1, param2, min_radius, max_radius)

            center = calculate_center(circles)
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            cropped_image = crop_image_safe(image, center, size=512)

            output_path = os.path.join(output_dir, filename)
            cv2.imwrite(output_path, cropped_image)

input_directory = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/real_images'
output_directory = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/cropped_real_images'

process_and_crop_directory(input_directory, output_directory, param1=35, param2=25, min_radius=1, max_radius=15)


import os
import pandas as pd
from PIL import Image
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor

# Defining the paths to the labeled images and the center points CSV file
labeled_images_path = 'C:/Users/iuruc/OneDrive/Desktop/GIT/project/labeled_images'
center_points_path = 'center_points.csv'

# Loading the center points CSV file into a Pandas DataFrame
center_points_df = pd.read_csv(center_points_path)

# Creating a list to store the images and their corresponding center points
labeled_data = []

# Iterating through the center points DataFrame
for index, row in center_points_df.iterrows():
    # Get the filename and center point coordinates from the row
    filename = row['filename']
    center_x = row['center_x']
    center_y = row['center_y']
    
    # Loading the image using PIL
    image_path = os.path.join(labeled_images_path, filename)
    image = Image.open(image_path)
    
    # Adding the image and its center coordinates to the labeled data list
    labeled_data.append({'image': image, 'center': (center_x, center_y)})

# Converting the labeled_data list to a DataFrame
labeled_data_df = pd.DataFrame(labeled_data)

# Preprocessing images (resize, normalize, and flatten)
processed_images = [cv2.resize(np.array(img), (128, 128)) for img in labeled_data_df['image']]
normalized_images = [img / 255.0 for img in processed_images]
flattened_images = [img.flatten() for img in normalized_images]

X = np.array(flattened_images)
y = np.array(labeled_data_df['center'].tolist())

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = [
    ("Linear Regression", LinearRegression()),
    ("Random Forest", RandomForestRegressor(n_estimators=100)),
    ("Support Vector Regression", SVR(kernel="linear")),
    ("Multi-layer Perceptron", MLPRegressor(hidden_layer_sizes=(128, 64), max_iter=1000, learning_rate_init=0.01)), 
]

y_train_x = y_train[:, 0]
y_train_y = y_train[:, 1]
y_test_x = y_test[:, 0]
y_test_y = y_test[:, 1]

for name, Model in models:
    model_x = Model
    model_x.fit(X_train, y_train_x)
    y_pred_x = model_x.predict(X_test)
    mse_x = mean_squared_error(y_test_x, y_pred_x)
    rmse_x = np.sqrt(mse_x)

    model_y = Model
    model_y.fit(X_train, y_train_y)
    y_pred_y = model_y.predict(X_test)
    mse_y = mean_squared_error(y_test_y, y_pred_y)
    rmse_y = np.sqrt(mse_y)

    rmse_avg = (rmse_x + rmse_y) / 2

    print(f"{name} RMSE x: {rmse_x:.2f}, RMSE y: {rmse_y:.2f}, Avg RMSE: {rmse_avg:.2f}")

Linear Regression RMSE x: 46.79, RMSE y: 46.57, Avg RMSE: 46.68
Random Forest RMSE x: 58.41, RMSE y: 56.45, Avg RMSE: 57.43
Support Vector Regression RMSE x: 44.21, RMSE y: 45.11, Avg RMSE: 44.66
Multi-layer Perceptron RMSE x: 112.13, RMSE y: 72.58, Avg RMSE: 92.35


import matplotlib.pyplot as plt


csv_file = "center_points.csv"
center_points = pd.read_csv(csv_file)
X_train, X_test, y_train, y_test, train_indices, test_indices = train_test_split(
    X, y, np.arange(len(X)), test_size=0.2, random_state=42)

# Training the best model (SVR with linear kernel) on the training dataset (X_train and y_train) for both x and y coordinates
best_model_x = SVR(kernel="linear")
best_model_x.fit(X_train, y_train[:, 0])

best_model_y = SVR(kernel="linear")
best_model_y.fit(X_train, y_train[:, 1])

# Loading the diffraction pattern images
image_folder = "C:/Users/iuruc/OneDrive/Desktop/GIT/project/labeled_images"
image_filenames = center_points["filename"]
images = [cv2.imread(os.path.join(image_folder, filename), cv2.IMREAD_GRAYSCALE) for filename in image_filenames]
num_images_to_plot = 5

def plot_image_with_centers(image, true_center, predicted_center):
    plt.imshow(image, cmap="gray")

    # Plotting true center in red
    plt.scatter([true_center[0]], [true_center[1]], c="r", marker="x", label="True Center")

    # Plotting predicted center in blue
    plt.scatter([predicted_center[0]], [predicted_center[1]], c="b", marker="o", label="Predicted Center")

    plt.legend()
    plt.show()


test_centers_df = center_points.loc[test_indices].sort_index()

for i in range(num_images_to_plot):
    image_index = test_indices[i]
    image = images[image_index]
    true_center = (test_centers_df.loc[image_index]['center_x'], test_centers_df.loc[image_index]['center_y'])
    predicted_center = (best_model_x.predict([X_test[i]])[0], best_model_y.predict([X_test[i]])[0])
    plot_image_with_centers(image, true_center, predicted_center)


import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
import matplotlib.pyplot as plt

# Loading the center_points.csv file
csv_file = "center_points.csv"
center_points = pd.read_csv(csv_file)

# Loading the diffraction pattern images
image_folder = "C:/Users/iuruc/OneDrive/Desktop/GIT/project/labeled_images"
image_filenames = center_points["filename"]
images = [cv2.imread(os.path.join(image_folder, filename), cv2.IMREAD_GRAYSCALE) for filename in image_filenames]

# Preprocessing images (resize, normalize, and flatten)
processed_images = [cv2.resize(img, (128, 128)) for img in images]
normalized_images = [img / 255.0 for img in processed_images]
flattened_images = [img.flatten() for img in normalized_images]

X = np.array(flattened_images)
y = center_points[["center_x", "center_y"]].values

# Modifying the train-test split to store the image indices
X_train, X_test, y_train, y_test, train_indices, test_indices = train_test_split(X, y, np.arange(len(X)), test_size=0.2, random_state=42)


X_train_reshaped = X_train.reshape(-1, 128, 128, 1)
X_test_reshaped = X_test.reshape(-1, 128, 128, 1)


model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(2)  # 2 output neurons, one for each coordinate (x, y)
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])


history = model.fit(X_train_reshaped, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
14/14 [==============================] - 3s 186ms/step - loss: 513494.9375 - mae: 665.0554 - val_loss: 309036.9688 - val_mae: 361.4373
Epoch 2/50
14/14 [==============================] - 2s 168ms/step - loss: 157001.0938 - mae: 303.9953 - val_loss: 36274.9023 - val_mae: 148.9750
Epoch 3/50
14/14 [==============================] - 2s 175ms/step - loss: 68881.1719 - mae: 207.2490 - val_loss: 12224.4043 - val_mae: 85.0395
Epoch 4/50
14/14 [==============================] - 3s 183ms/step - loss: 57285.3516 - mae: 190.2987 - val_loss: 10844.3418 - val_mae: 80.4667
Epoch 5/50
14/14 [==============================] - 2s 168ms/step - loss: 54042.0625 - mae: 186.7666 - val_loss: 17758.9082 - val_mae: 110.2792
Epoch 6/50
14/14 [==============================] - 2s 161ms/step - loss: 52173.4570 - mae: 183.4860 - val_loss: 14071.1797 - val_mae: 95.3893
Epoch 7/50
14/14 [==============================] - 3s 183ms/step - loss: 60418.5547 - mae: 195.1340 - val_loss: 17957.0586 - val_mae: 110.1701
Epoch 8/50
14/14 [==============================] - 3s 180ms/step - loss: 48904.8203 - mae: 174.6080 - val_loss: 15171.7168 - val_mae: 100.9948
Epoch 9/50
14/14 [==============================] - 3s 208ms/step - loss: 52910.6289 - mae: 181.3876 - val_loss: 15646.9092 - val_mae: 102.7521
Epoch 10/50
14/14 [==============================] - 3s 211ms/step - loss: 49445.0898 - mae: 178.6366 - val_loss: 8494.2324 - val_mae: 70.2333
Epoch 11/50
14/14 [==============================] - 3s 221ms/step - loss: 45409.6133 - mae: 171.3108 - val_loss: 12394.7344 - val_mae: 89.1762
Epoch 12/50
14/14 [==============================] - 3s 227ms/step - loss: 51073.2500 - mae: 181.8973 - val_loss: 14602.2188 - val_mae: 98.2207
Epoch 13/50
14/14 [==============================] - 3s 222ms/step - loss: 45633.6016 - mae: 170.2724 - val_loss: 9295.7139 - val_mae: 74.9124
Epoch 14/50
14/14 [==============================] - 3s 187ms/step - loss: 38135.7148 - mae: 155.7316 - val_loss: 12164.2520 - val_mae: 89.7721
Epoch 15/50
14/14 [==============================] - 2s 172ms/step - loss: 56185.8008 - mae: 186.1982 - val_loss: 9069.3535 - val_mae: 71.6588
Epoch 16/50
14/14 [==============================] - 3s 192ms/step - loss: 54952.6016 - mae: 187.0747 - val_loss: 28041.9805 - val_mae: 140.4011
Epoch 17/50
14/14 [==============================] - 3s 200ms/step - loss: 48503.3008 - mae: 175.9994 - val_loss: 8476.0352 - val_mae: 70.3418
Epoch 18/50
14/14 [==============================] - 2s 164ms/step - loss: 41618.4375 - mae: 161.2384 - val_loss: 9651.0332 - val_mae: 77.8411
Epoch 19/50
14/14 [==============================] - 3s 190ms/step - loss: 41600.8359 - mae: 159.4567 - val_loss: 12430.7910 - val_mae: 86.5680
Epoch 20/50
14/14 [==============================] - 3s 204ms/step - loss: 42817.4922 - mae: 165.0264 - val_loss: 20839.7832 - val_mae: 121.6025
Epoch 21/50
14/14 [==============================] - 2s 167ms/step - loss: 44005.6680 - mae: 162.8082 - val_loss: 14695.1162 - val_mae: 99.6150
Epoch 22/50
14/14 [==============================] - 3s 182ms/step - loss: 41019.1797 - mae: 160.3583 - val_loss: 11260.9209 - val_mae: 85.3464
Epoch 23/50
14/14 [==============================] - 3s 182ms/step - loss: 41656.1250 - mae: 162.3397 - val_loss: 6961.4014 - val_mae: 63.9367
Epoch 24/50
14/14 [==============================] - 2s 174ms/step - loss: 41112.5586 - mae: 161.8333 - val_loss: 11983.5176 - val_mae: 88.3822
Epoch 25/50
14/14 [==============================] - 3s 190ms/step - loss: 36768.3867 - mae: 154.1000 - val_loss: 9257.6904 - val_mae: 76.5554
Epoch 26/50
14/14 [==============================] - 2s 180ms/step - loss: 38047.8945 - mae: 154.4803 - val_loss: 6879.0586 - val_mae: 63.6924
Epoch 27/50
14/14 [==============================] - 2s 178ms/step - loss: 38159.7500 - mae: 155.7876 - val_loss: 7848.8081 - val_mae: 67.3641
Epoch 28/50
14/14 [==============================] - 3s 183ms/step - loss: 39869.6445 - mae: 156.6672 - val_loss: 6849.4185 - val_mae: 63.0297
Epoch 29/50
14/14 [==============================] - 2s 178ms/step - loss: 35791.7266 - mae: 148.7268 - val_loss: 8894.0352 - val_mae: 73.6637
Epoch 30/50
14/14 [==============================] - 2s 175ms/step - loss: 38856.2148 - mae: 152.0454 - val_loss: 8125.0220 - val_mae: 71.3468
Epoch 31/50
14/14 [==============================] - 2s 175ms/step - loss: 38985.4805 - mae: 154.6071 - val_loss: 7212.5674 - val_mae: 64.7818
Epoch 32/50
14/14 [==============================] - 3s 193ms/step - loss: 35161.8984 - mae: 148.0947 - val_loss: 11680.4854 - val_mae: 87.6116
Epoch 33/50
14/14 [==============================] - 3s 189ms/step - loss: 38856.6367 - mae: 152.9444 - val_loss: 13580.7236 - val_mae: 96.3058
Epoch 34/50
14/14 [==============================] - 3s 187ms/step - loss: 34082.2617 - mae: 145.8008 - val_loss: 13789.6455 - val_mae: 97.1602
Epoch 35/50
14/14 [==============================] - 2s 176ms/step - loss: 39753.3516 - mae: 155.9882 - val_loss: 9894.0244 - val_mae: 79.0850
Epoch 36/50
14/14 [==============================] - 3s 206ms/step - loss: 38168.2227 - mae: 152.2301 - val_loss: 9049.4873 - val_mae: 76.9113
Epoch 37/50
14/14 [==============================] - 3s 197ms/step - loss: 37809.3945 - mae: 151.9575 - val_loss: 6121.3682 - val_mae: 59.3914
Epoch 38/50
14/14 [==============================] - 3s 206ms/step - loss: 34882.6250 - mae: 147.9478 - val_loss: 6555.5249 - val_mae: 62.2391
Epoch 39/50
14/14 [==============================] - 3s 186ms/step - loss: 40214.3477 - mae: 156.6817 - val_loss: 5955.0142 - val_mae: 58.8570
Epoch 40/50
14/14 [==============================] - 2s 180ms/step - loss: 33745.6953 - mae: 142.2043 - val_loss: 8332.7705 - val_mae: 72.3715
Epoch 41/50
14/14 [==============================] - 3s 184ms/step - loss: 35146.2578 - mae: 147.8409 - val_loss: 6093.9971 - val_mae: 59.7857
Epoch 42/50
14/14 [==============================] - 3s 190ms/step - loss: 36922.6562 - mae: 150.2718 - val_loss: 8551.4004 - val_mae: 74.8650
Epoch 43/50
14/14 [==============================] - 2s 177ms/step - loss: 32898.2617 - mae: 143.7622 - val_loss: 6287.2979 - val_mae: 60.9784
Epoch 44/50
14/14 [==============================] - 3s 192ms/step - loss: 35686.6953 - mae: 152.8959 - val_loss: 6396.4961 - val_mae: 61.7678
Epoch 45/50
14/14 [==============================] - 3s 194ms/step - loss: 34795.9766 - mae: 146.4392 - val_loss: 7880.8550 - val_mae: 71.3367
Epoch 46/50
14/14 [==============================] - 3s 185ms/step - loss: 37937.4453 - mae: 153.3869 - val_loss: 6754.8315 - val_mae: 63.9395
Epoch 47/50
14/14 [==============================] - 2s 170ms/step - loss: 37404.0859 - mae: 151.8143 - val_loss: 5452.6689 - val_mae: 56.1554
Epoch 48/50
14/14 [==============================] - 2s 174ms/step - loss: 34073.4336 - mae: 146.7517 - val_loss: 6039.5771 - val_mae: 59.9425
Epoch 49/50
14/14 [==============================] - 3s 193ms/step - loss: 37913.6875 - mae: 152.0940 - val_loss: 10593.8740 - val_mae: 84.8432
Epoch 50/50
14/14 [==============================] - 2s 169ms/step - loss: 33527.7070 - mae: 144.4420 - val_loss: 5180.4526 - val_mae: 55.0343
5/5 [==============================] - 0s 41ms/step - loss: 5313.1445 - mae: 56.8958
Test Mean Absolute Error: 56.89584732055664


test_loss, test_mae = model.evaluate(X_test_reshaped, y_test)
print(f"Test Mean Absolute Error: {test_mae}")

5/5 [==============================] - 0s 39ms/step - loss: 5313.1445 - mae: 56.8958
Test Mean Absolute Error: 56.89584732055664


from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.callbacks import EarlyStopping

# Defining a learning rate scheduler
def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * 0.1

# Defining the CNN model
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(128, 128, 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(2)
])

# Compiling the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

# Defining early stopping
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,  # Number of epochs to wait before stopping training when no improvement is observed
    restore_best_weights=True
)


# Training the model with early stopping
history = model.fit(
    X_train.reshape(-1, 128, 128, 1),
    y_train,
    batch_size=32,
    epochs=20,
    validation_split=0.2,
    callbacks=[LearningRateScheduler(scheduler), early_stopping]
)

# Evaluating the model on the test set
test_loss, test_mae = model.evaluate(X_test.reshape(-1, 128, 128, 1), y_test)
print(f"Test Mean Absolute Error: {test_mae}")

Improving Image Processing for Electron Diffraction Pattern Models¶

Ibrahim Uruc Tarim¶

Table of Contents¶

Abstract¶

Introduction¶

What is Electron Diffraction and Why is it Important?¶

Aim 1¶

Aim 2¶

Data Science Methods¶

Exploratory Data Analysis¶

The Data¶

Data Cleaning¶

Pu-Zr Experimentally Observed Phase¶

Looking Into Dataset¶

Experimental vs Simulated¶

Visuals and Differences¶

Developing The Graph Model¶

Applying To Directories¶

Image Labeling Method¶

Machine Learning¶

Regression Models Roadmap¶

Results¶

Convolutional Neural Network¶

Discussion¶

Conclusions¶