Skip to content

Commit

Permalink
Server logic updated
Browse files Browse the repository at this point in the history
  • Loading branch information
aiaragomes committed Aug 28, 2023
1 parent 9037b40 commit d795c82
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 58 deletions.
3 changes: 1 addition & 2 deletions federated_learning_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,7 @@ async def calculate_average(self) -> None:
self._received_models = 0
logger.info(f"Calculating average of {files} into {output}")

# TODO: change averaging
helper_server.average_weights(output, full_output, files)
self._params["change"] = helper_server.average_weights(output, full_output, files, self._params["k"])

# Stop when maximum number of iterations or convergence criterion is achieved
if (self._current_round > self._params["max_iter"]) or (self._params["change"] <= self._params["epsilon"]):
Expand Down
85 changes: 29 additions & 56 deletions helper_server.py
Original file line number Diff line number Diff line change
@@ -1,86 +1,54 @@
from typing import Optional, Dict, Any, List

import json

import numpy as np
import pandas as pd

from scipy.spatial import distance
from sklearn.cluster import KMeans
from pandas.api.types import is_string_dtype
from typing import Dict, Any, List


def initialize(model_centroids: str, **kwargs) -> Dict[str, Any]:
global k
global centroids
global epsilon
global max_iter
global columns
global change

# Centroids
centroids_input = kwargs.get("centroids", None)
if centroids_input is None:
# These are the initial centroids if none are given, it only makes
# sense for the TNM data of the PoC
centroids = [[1, 0, 0], [2, 1, 0], [3, 2, 0], [5, 3, 1]]
kwargs["centroids"] = centroids
else:
centroids = centroids_input
kwargs["centroids"] = [[1, 0, 0], [2, 1, 0], [3, 2, 0], [5, 3, 1]]

# Number of clusters k
k_input = kwargs.get("k", None)
if k_input is None:
k = 4
kwargs["k"] = k
else:
k = k_input
kwargs["k"] = 4

# Convergence criterion epsilon
epsilon_input = kwargs.get("epsilon", None)
if epsilon_input is None:
epsilon = 0.01
kwargs["epsilon"] = epsilon
else:
epsilon = epsilon_input
kwargs["epsilon"] = 0.01

# Maximum number of iterations
max_iter_input = kwargs.get("max_iter", None)
if max_iter_input is None:
max_iter = 50
kwargs["max_iter"] = max_iter
else:
max_iter = max_iter_input
kwargs["max_iter"] = 50

# Columns to use for clustering
columns_input = kwargs.get("columns", None)
if columns_input is None:
# TODO: use all columns from input file when none are given
columns = ['t', 'n', 'm']
kwargs["columns"] = columns
else:
columns = columns_input
kwargs["columns"] = ['t', 'n', 'm']

# Change in centroids, initialize as something bigger than epsilon
change = 2*epsilon
change = 2*kwargs["epsilon"]
kwargs["change"] = change

# TODO: in the original code model_weigths seem to be saved, do we need
# to save the centroids, a common_model file is given as input here,
# but I'm unsure how it is used
return kwargs
# Save initial centroids
print("Saving initial centroids")
with open(model_centroids, "w+") as f:
json.dump(kwargs["centroids"], f)


def get_weights(file: str):
model = json.load(open(file))['centroids']
return model
return kwargs


def average_weights(model_weights: str, full_model: str, files: List[str]) -> None:
# TODO: make sure things are properly saved here
print(f"Averaging {files} into {model_weights}")
results = [get_weights(file) for file in files]
print("Loaded weights")
def average_weights(model_centroids: str, full_model: str, files: List[str], k: int) -> None:
print(f"Averaging {files} into {model_centroids}")
results = [json.load(open(file))['centroids'] for file in files]
print("Loaded centroids")

# Organise local centroids into a matrix
local_centroids = []
Expand All @@ -91,24 +59,29 @@ def average_weights(model_weights: str, full_model: str, files: List[str]) -> No

# Average centroids by running kmeans on local results
print('Run global averaging for centroids')
# TODO: get k from input params
k = 4
kmeans = KMeans(n_clusters=k, random_state=0).fit(X)
new_centroids = kmeans.cluster_centers_

# Read previous centroids
centroids = json.load(open(model_centroids))['centroids']

# Compute the sum of the magnitudes of the centroids differences
# between steps. This change in centroids between steps will be used
# to evaluate convergence.
print('Compute change in cluster centroids')
change = 0
for i in range(k):
diff = new_centroids[i] - np.array(model_weights[i])
diff = new_centroids[i] - np.array(centroids[i])
change += np.linalg.norm(diff)

# Re-define the centroids
model_weights = list(list(centre) for centre in new_centroids)
centroids = {'centroids': list(list(centre) for centre in new_centroids)}

# Save results
print("Saving average centroids")
with open(model_centroids, "w+") as f:
json.dump(centroids, f)
with open(full_model, "w+") as f:
json.dump(centroids, f)

# TODO: somehow save the result
# print("Saving average weights")
# model.save_weights(model_weights)
# model.save(full_model)
return change

0 comments on commit d795c82

Please sign in to comment.