Skip to content

Commit

Permalink
Fix txt repeating text removal
Browse files Browse the repository at this point in the history
  • Loading branch information
mantas-done committed Apr 16, 2024
1 parent fbd3858 commit 494cafb
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 1 deletion.
43 changes: 42 additions & 1 deletion src/Code/Converters/TxtConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,10 @@ public function fileContentToInternalFormat($file_content, $original_file_conten
}
unset($row);

return self::fillStartAndEndTimes($internal_format);
$internal_format = self::fillStartAndEndTimes($internal_format);
$internal_format = self::removeRepeatingTextStarts($internal_format);

return $internal_format;
}

// start and end timestamp
Expand Down Expand Up @@ -518,6 +521,44 @@ private static function twoLinesSeparatedByEmptyLine(string $file_content)
return self::fillStartAndEndTimes($internal_format);
}

public static function removeRepeatingTextStarts($internal_format)
{
if (count($internal_format) <= 2) {
return $internal_format; // don't try to filter if there almost no lines
}

$repeating_string = '';

$first_lines = [];
foreach ($internal_format as $subtitle) {
$first_lines[] = $subtitle['lines'][0];
}

$length = strlen($first_lines[0]);
for ($i = 0; $i < $length; $i++) {
$letter = $first_lines[0][$i];

foreach ($first_lines as $line) {
if (!isset($line[$i])) {
break 2;
}
$line_letter = $line[$i];
if ($line_letter !== $letter) {
break 2;
}
}
$repeating_string .= $letter;
}

$repeating_length = strlen($repeating_string);
foreach ($internal_format as &$subtitle) {
$subtitle['lines'][0] = substr($subtitle['lines'][0], $repeating_length);
}
unset($subtitle);

return $internal_format;
}

private static function hasTime($line)
{
return preg_match(self::$time_regexp, $line) === 1;
Expand Down
15 changes: 15 additions & 0 deletions tests/formats/TxtTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,21 @@ public function testDoesNotRemoveNotHtmlTag()
$this->assertInternalFormatsEqual($expected, $actual);
}

public function testRemoveRepeatingTextFromBeginningOfText()
{
$actual = Subtitles::loadFromString('
00:00:00:a
00:00:01:b
00:00:02:c
')->getInternalFormat();
$expected = (new Subtitles())
->add(0, 1, 'a')
->add(1, 2, 'b')
->add(2, 3, 'c')
->getInternalFormat();
$this->assertInternalFormatsEqual($expected, $actual);
}

// ---------------------------------- private ----------------------------------------------------------------------

private static function generatedSubtitles()
Expand Down

0 comments on commit 494cafb

Please sign in to comment.