Skip to content

Commit

Permalink
typos and fix in separation model
Browse files Browse the repository at this point in the history
  • Loading branch information
genisplaja committed Oct 2, 2024
1 parent 1b2b7cf commit c848f7d
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 71 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ compiam/models/timbre/mridangam_stroke_classification/pre-computed_features.csv
compiam/models/structure/dhrupad_bandish_segmentation/annotations/section_boundaries_labels.csv
compiam/models/structure/dhrupad_bandish_segmentation/annotations/cycle_boundaries/
tests/resources/mir_datasets/*
compiam/models/separation/

# For next release
compiam/models/rhythm/4wayTabla/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def load_model(self, model_path):
if ".data-00000-of-00001" not in model_path:
path_to_check = model_path + ".data-00000-of-00001"
if not os.path.exists(path_to_check):
self.download_model(model_path) # Dowloading model weights
self.download_model(model_path) # Downloading model weights
self.model.load_weights(model_path).expect_partial()
self.model_path = model_path
self.trained = True
Expand Down
147 changes: 77 additions & 70 deletions compiam/separation/singing_voice_extraction/cold_diff_sep/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,80 +138,86 @@ def separate(
runs = math.floor(mixture.shape[0] / hopsized_chunk)
trim_low = 0
for trim in tqdm.tqdm(np.arange((runs * 2) - 1)):
trim_high = int(trim_low + (hopsized_chunk * 2))

# Get input mixture spectrogram
mix_trim = mixture[trim_low:trim_high]
mix_mag, mix_phase = compute_stft(mix_trim[None], self.unet_config)
new_len = next_power_of_2(mix_mag.shape[1])
mix_mag_trim = mix_mag[:, :new_len, :]
mix_phase_trim = mix_phase[:, :new_len, :]

# Get and stack cold diffusion steps
diff_feat = self.model(mix_mag_trim, mode="train")
diff_feat = tf.transpose(diff_feat, [1, 0, 2, 3])
diff_feat_t = tf.squeeze(
tf.reshape(
diff_feat, [1, 8, diff_feat.shape[-2] * diff_feat.shape[-1]]
),
axis=0,
).numpy()

# Normalize features, all energy curves having same range
normalized_feat = []
for j in np.arange(diff_feat_t.shape[1]):
normalized_curve = diff_feat_t[:, j] / (
np.max(np.abs(diff_feat_t[:, j])) + 1e-6
try:
trim_high = int(trim_low + (hopsized_chunk * 2))

# Get input mixture spectrogram
mix_trim = mixture[trim_low:trim_high]
mix_mag, mix_phase = compute_stft(mix_trim[None], self.unet_config)
new_len = next_power_of_2(mix_mag.shape[1])
mix_mag_trim = mix_mag[:, :new_len, :]
mix_phase_trim = mix_phase[:, :new_len, :]

# Get and stack cold diffusion steps
diff_feat = self.model(mix_mag_trim, mode="train")
diff_feat = tf.transpose(diff_feat, [1, 0, 2, 3])
diff_feat_t = tf.squeeze(
tf.reshape(
diff_feat, [1, 8, diff_feat.shape[-2] * diff_feat.shape[-1]]
),
axis=0,
).numpy()

# Normalize features, all energy curves having same range
normalized_feat = []
for j in np.arange(diff_feat_t.shape[1]):
normalized_curve = diff_feat_t[:, j] / (
np.max(np.abs(diff_feat_t[:, j])) + 1e-6
)
normalized_feat.append(normalized_curve)
normalized_feat = np.array(normalized_feat, dtype=np.float32)

# Compute mask using unsupervised clustering and reshape to magnitude spec shape
mask = get_mask(normalized_feat, clusters, scheduler)
mask = tf.convert_to_tensor(
mask, dtype=tf.float32
) # Move mask to tensor and cast to float
mask = tf.reshape(mask, mix_mag_trim.shape)

# Getting last step of computed features and applying mask
diff_feat_t = tf.reshape(diff_feat_t[-1, :], mix_mag_trim.shape)
output_signal = tf.math.multiply(diff_feat_t, mask)

# Silence unvoiced regions
output_signal = compute_signal_from_stft(
output_signal, mix_phase_trim, self.unet_config
)
normalized_feat.append(normalized_curve)
normalized_feat = np.array(normalized_feat, dtype=np.float32)

# Compute mask using unsupervised clustering and reshape to magnitude spec shape
mask = get_mask(normalized_feat, clusters, scheduler)
mask = tf.convert_to_tensor(
mask, dtype=tf.float32
) # Move mask to tensor and cast to float
mask = tf.reshape(mask, mix_mag_trim.shape)

# Getting last step of computed features and applying mask
diff_feat_t = tf.reshape(diff_feat_t[-1, :], mix_mag_trim.shape)
output_signal = tf.math.multiply(diff_feat_t, mask)

# Silence unvoiced regions
output_signal = compute_signal_from_stft(
output_signal, mix_phase_trim, self.unet_config
)
# From here on, pred_audio is numpy
pred_audio = tf.squeeze(output_signal, axis=0).numpy()
vad = VAD(
pred_audio,
sr=22050,
nFFT=512,
win_length=0.025,
hop_length=0.01,
theshold=0.99,
)
if np.sum(vad) / len(vad) < 0.25:
pred_audio = np.zeros(pred_audio.shape)

# Get boundary
boundary = None
boundary = "start" if trim == 0 else None
boundary = "end" if trim == runs - 2 else None

placehold_voc = np.zeros(output_voc.shape)
placehold_voc[
trim_low : trim_low + pred_audio.shape[0]
] = pred_audio * get_overlap_window(pred_audio, boundary=boundary)
output_voc += placehold_voc
trim_low += pred_audio.shape[0] // 2

output_voc = output_voc * (
# From here on, pred_audio is numpy
pred_audio = tf.squeeze(output_signal, axis=0).numpy()
vad = VAD(
pred_audio,
sr=22050,
nFFT=512,
win_length=0.025,
hop_length=0.01,
theshold=0.99,
)
if np.sum(vad) / len(vad) < 0.25:
pred_audio = np.zeros(pred_audio.shape)

# Get boundary
boundary = None
boundary = "start" if trim == 0 else None
boundary = "end" if trim == runs - 2 else None

placehold_voc = np.zeros(output_voc.shape)
placehold_voc[
trim_low : trim_low + pred_audio.shape[0]
] = pred_audio * get_overlap_window(pred_audio, boundary=boundary)
output_voc += placehold_voc
trim_low += pred_audio.shape[0] // 2

except:
output_voc = output_voc * (
np.max(np.abs(mixture.numpy())) / (np.max(np.abs(output_voc)) + 1e-6)
)
output_voc = output_voc[:trim_low]
return output_voc

return output_voc * (
np.max(np.abs(mixture.numpy())) / (np.max(np.abs(output_voc)) + 1e-6)
)

return output_voc

# TODO: write a function to store audio
# Building intuitive filename with model config
# filefolder = os.path.join(args.input_signal.split("/")[:-1])
Expand Down Expand Up @@ -243,3 +249,4 @@ def download_model(self, model_path=None):
# Delete zip file after extraction
os.remove(output)
logger.warning("Files downloaded and extracted successfully.")

0 comments on commit c848f7d

Please sign in to comment.