Skip to content

Commit

Permalink
Add support for multiline text after the timestamp
Browse files Browse the repository at this point in the history
  • Loading branch information
mantas-done committed Aug 1, 2023
1 parent f61fbc6 commit c994b2f
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/Code/Converters/AssConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class AssConverter implements ConverterContract
{
public function canParseFileContent($file_content)
{
return preg_match('/^\[Script Info\]\R/m', $file_content) === 1;
return preg_match('/\[Script Info\]\R/m', $file_content) === 1;
}

public function fileContentToInternalFormat($file_content)
Expand Down
15 changes: 13 additions & 2 deletions src/Code/Converters/CsvConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,25 @@ public function fileContentToInternalFormat($file_content)
$data = self::csvToArray($file_content);
$data_string = '';

$is_start_time = (bool) preg_match(TxtConverter::$time_regexp, $data[1][0]);
$is_end_time = (bool) preg_match(TxtConverter::$time_regexp, $data[1][1]);

foreach ($data as $k => $row) {
$timestamp_found = (bool) preg_match(TxtConverter::$time_regexp, $row[0]);
if ($k === 0 && $timestamp_found === false) { // heading
continue;
}

$row_string = implode(' ', $row);
$data_string .= $row_string . "\n";
// format csv file as a txt file, so TxtConverter would be able to understand it
if ($is_start_time && $is_end_time) {
$data_string .= $row[0] . ' ' . $row[1] . "\n"; // start end
$data_string .= $row[2] . "\n"; // text
} elseif ($is_start_time) {
$data_string .= $row[0] . "\n"; // start
$data_string .= $row[1] . "\n"; // text
} else {
$data_string .= $row[0] . "\n"; // text
}
}
return (new TxtConverter)->fileContentToInternalFormat($data_string);
}
Expand Down
14 changes: 8 additions & 6 deletions src/Code/Converters/TxtConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,14 @@ public function fileContentToInternalFormat($file_content)
$internal_format = [];
$j = 0;
foreach ($data as $k => $row) {
if (
isset($data[$k - 1]['start'])
&& ($data[$k - 1]['start'] === $row['start'] || $row['start'] === null)
) {
$internal_format[$j - 1]['lines'][] = $row['text'];
continue;
for ($i = 1; $i <= 10; $i++) { // up to 10 lines
if (
isset($data[$k - $i]['start'])
&& ($data[$k - $i]['start'] === $row['start'] || $row['start'] === null)
) {
$internal_format[$j - $i]['lines'][] = $row['text'];
continue 2;
}
}

$internal_format[$j] = [
Expand Down
6 changes: 3 additions & 3 deletions src/Subtitles.php
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ public static function loadFromString($string, $format = null)
continue;
}
$diff = $internal_format[$k - 1]['end'] - $row['start'];
if ($diff < 1 && $diff > 0) {
if ($diff < 10 && $diff > 0) {
$internal_format[$k - 1]['end'] = $row['start'];
}
}
Expand All @@ -239,11 +239,11 @@ public static function loadFromString($string, $format = null)
$last_end_time = 0;
foreach ($internal_format as $row) {
if ($row['start'] < $last_end_time) {
throw new UserException('Times are overlapping near text: ' . SrtConverter::internalTimeToSrt($row['start']) . ' ' . $row['lines'][0]);
throw new UserException('Times are overlapping over 10 seconds near the text: ' . SrtConverter::internalTimeToSrt($row['start']) . ' ' . $row['lines'][0]);
}
$last_end_time = $row['end'];
if ($row['start'] > $row['end']) {
throw new UserException('Times are overlapping near text: ' . SrtConverter::internalTimeToSrt($row['start']) . ' ' . $row['lines'][0]);
throw new UserException('Start time is bigger than the end time near text: ' . SrtConverter::internalTimeToSrt($row['start']) . ' ' . $row['lines'][0]);
}
}

Expand Down
15 changes: 15 additions & 0 deletions tests/formats/CsvTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,19 @@ public function testParseFileWithSingleTimestamp()

$this->assertInternalFormatsEqual($expected_internal_format, $actual_internal_format);
}

public function testAdditionalColumns()
{
$string = <<< TEXT
Start Time,End Time,Text,Layer ID
00:00:08:00,00:00:13:00,"abc",1
00:00:20:00,00:00:24:00,def,1
TEXT;
$actual_internal_format = Subtitles::loadFromString($string)->getInternalFormat();
$expected_internal_format = (new Subtitles())
->add(8, 13, ['abc'])
->add(20, 24, ['def'])->getInternalFormat();

$this->assertInternalFormatsEqual($expected_internal_format, $actual_internal_format);
}
}
12 changes: 8 additions & 4 deletions tests/formats/TxtTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -265,12 +265,16 @@ public function testFirstLineWithTimestampSecondAfter()
$actual = Subtitles::loadFromString('
01:01 a
b
01:02 c
c
d
01:02 e
f
01:03 g
')->getInternalFormat();
// var_dump($actual); exit;
$expected = (new Subtitles())
->add(61, 62, ['a', 'b'])
->add(62, 63, 'c')
->add(61, 62, ['a', 'b', 'c', 'd'])
->add(62, 63, ['e', 'f'])
->add(63, 64, 'g')
->getInternalFormat();
$this->assertInternalFormatsEqual($expected, $actual);
}
Expand Down

0 comments on commit c994b2f

Please sign in to comment.