Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
travisgk authored Sep 20, 2024
1 parent 437e9b1 commit c819808
Show file tree
Hide file tree
Showing 4 changed files with 226 additions and 19 deletions.
12 changes: 7 additions & 5 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,14 @@
</head>
<body>
<h1>Long S Converter</h1>
<p>This tool converts text to use the archaic long S letter ( ſ ) in its spellings.</p>
<!-- loads scripts traditionally, no modules required. -->
<script src="js-long-s/splitstring.js"></script>
<script src="js-long-s/romance.js"></script>
<script src="script.js"></script>

<p>This tool converts text to use the archaic long S letter (ſ) in its spellings.</p>

<p>English, Spanish, French, Italian, and German are supported.</p>
<p>English, Spanish, French, and Italian are supported.</p>

<br>

Expand All @@ -24,7 +29,6 @@ <h1>Long S Converter</h1>
<option value="es">Español</option>
<option value="fr">Français</option>
<option value="it">Italiano</option>
<option value="de">Deutsch</option>
</select>

<!-- Button to trigger conversion -->
Expand All @@ -33,7 +37,5 @@ <h1>Long S Converter</h1>

<!-- Paragraph to display the output text -->
<p id="outputText"></p>

<script src="script.js"></script>
</body>
</html>
120 changes: 120 additions & 0 deletions js-long-s/romance.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
js-long-s/romance.js
by TravisGK
---
this file contains the functions necessary to convert words in romantic languages
so that they use the archaeic letter called the long S (ſ).
although English is a germanic language, it's rules are regular enough to be
facilitated by code already in this file, so it's conversion function is here.
*/

function removeAccents(str) {
return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
}

function romanceConversion(
text,
mainPattern,
exclusionPattern=null,
ssiUsesDoubleSS=true,
) {
// this func returns text that uses the long s.
let cleanText = removeAccents(text);

// finds the indices of the letter S that follow the given pattern.
let match;
let indices = [];
while ((match = mainPattern.exec(cleanText)) !== null) {
indices.push(match.index);
}

// filters out excluded indices.
if (exclusionPattern !== null) {
let excludedIndices = [];
while ((match = exclusionPattern.exec(cleanText)) !== null) {
excludedIndices.push(match.index);
}
indices = indices.filter(i => !excludedIndices.includes(i));
}

indices.forEach(i => {
text = text.substring(0, i) + "ſ" + text.substring(i + 1);
});

if (ssiUsesDoubleSS) {
return text;
}

const pattern = /ſſi/g;
text = text.replace(pattern, "ſsi");
return text;
}

function englishConversion(text) {
const ROUND_S_BEFORE_BK = false; // true in 17th and early 18th century.

let mainPattern;
if (ROUND_S_BEFORE_BK) {
mainPattern = /s(?=[a-ac-jm-zA-AC-JM-Z])/g;
} else {
mainPattern = /s(?=[a-zA-Z])/g;
}
let exclusionPattern = /ss(?=f|F)|(?<=f|F)s/g;
text = romanceConversion(text, mainPattern, exclusionPattern);

let pattern = /ſſſ/g;
text = text.replace(pattern, "ſsſ");

return [
text, // converted text.
true, // if any replacement is made.
true, // if any fancy in-place replacements are needed
];
}

function spanishConversion(text) {
const USE_LONG_S_BEFORE_ACCENTED_O = false;

let mainPattern = /s(?=[a-ac-eg-gi-zA-AC-EG-GI-Z-—])/g;
let exclusionPattern;
if (USE_LONG_S_BEFORE_ACCENTED_O) {
exclusionPattern = /s(?=[áàéèíìúùüÁÀÉÈÍÌÚÙÜ])/g;
} else {
exclusionPattern = /s(?=[áàéèíìóòúüÁÀÉÈÍÌÓÒÚÙÜ])/g;
}

text = romanceConversion(text, mainPattern, exclusionPattern, false);
return [
text, // converted text.
true, // if any replacement is made.
true, // if any fancy in-place replacements are needed
];
}

function frenchConversion(text) {
let mainPattern = /s(?=[a-ac-eg-gi-zA-AC-EG-GI-Z])/g;
let exclusionPattern = null;
text = romanceConversion(text, mainPattern, exclusionPattern);

return [
text, // converted text.
true, // if any replacement is made.
true, // if any fancy in-place replacements are needed
];
}

function italianConversion(text) {
const USE_DOUBLE_LONG_WITH_SSI = true;

let mainPattern = /s(?=[a-ac-eg-zA-AC-EG-Z-—])/g;
let exclusionPattern = /s(?=[áàéèíìóòúüÁÀÉÈÍÌÓÒÚÙÜ])/g;
text = romanceConversion(
text, mainPattern, exclusionPattern, USE_DOUBLE_LONG_WITH_SSI
);

return [
text, // converted text.
true, // if any replacement is made.
true, // if any fancy in-place replacements are needed
];
}
56 changes: 56 additions & 0 deletions js-long-s/splitstring.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
function isLetter(char) {
// returns true if the given character is a letter.
const category = char.codePointAt(0);
return (category >= 0x0041 && category <= 0x005A) || // A-Z.
(category >= 0x0061 && category <= 0x007A) || // a-z.
(category >= 0x00C0 && category <= 0x00FF); // accented letters (Lat-1).
}

function splitStringWithIndices(inputString, lang) {
const APOSTROPHES = "'";
const results = [];
const regex = /\S+/g; // matches non-whitespace sequences.

// uses regex to find all matches in the input string.
let match;
while ((match = regex.exec(inputString)) !== null) {
let index = match.index;
let word = match[0];

// finds the first letter of the word.
let localStartIndex = 0;
for (let i = 0; i < word.length; i++) {
if (isLetter(word[i])) {
localStartIndex = i;
break;
}
}

if (localStartIndex === word.length) continue; // no letters found.

let localEndIndex = word.length - 1;
if (lang === "de") {
// if German, it considers apostrophes for ending index.
for (let i = word.length - 1; i >= 0; i--) {
if (isLetter(word[i]) || word[i] === APOSTROPHES) {
localEndIndex = i;
break;
}
}
} else {
// for other languages, it only considers letters for ending index.
for (let i = word.length - 1; i >= 0; i--) {
if (isLetter(word[i])) {
localEndIndex = i;
break;
}
}
}

index += localStartIndex;
word = word.substring(localStartIndex, localEndIndex + 1);
results.push([index, word]);
}

return results;
}
57 changes: 43 additions & 14 deletions script.js
Original file line number Diff line number Diff line change
@@ -1,26 +1,55 @@
function convertText() {
// Get the text from the textarea
const keepUnknownS = false;

const inputText = document.getElementById("inputText").value;

// Get the selected option from the dropdown
const langOption = document.getElementById("langOption").value;

// Variable to hold the output
let outputText = "";

// Check what option was selected and perform the appropriate conversion
let outputText = inputText;
let convertFunc = null;

if (langOption === "en") {
outputText = "EN";
convertFunc = englishConversion;
} else if (langOption === "es") {
outputText = "ES";
convertFunc = spanishConversion;
//const result = spanishConversion(inputText);
//outputText = result[0]; // modified text.
} else if (langOption === "fr") {
outputText = "FR";
convertFunc = frenchConversion;
// const result = frenchConversion(inputText);
// outputText = result[0]; // modified text.
} else if (langOption === "it") {
outputText = "IT";
convertFunc = italianConversion;
// const result = italianConversion(inputText);
// outputText = result[0]; // modified text.
} else {
outputText = "DE";
}

// Display the output text

if (convertFunc !== null) {
let results = splitStringWithIndices(inputText, langOption);
for (let [i, oldWord] of results) {
let [newWord, replacementMade, useFancyReplace] = convertFunc(oldWord);

if (!replacementMade) {
continue;
}

if (!useFancyReplace) {
outputText = outputText.substring(0, i) + newWord + outputText.substring(i + newWord.length);
} else {
if (!keepUnknownS) {
newWord = newWord.replace(/X/g, "ſ"); // replaces UNKNOWN_S with long s
}

for (let j = 0; j < newWord.length; j++) {
if (outputText[i + j] === "s") {
outputText = outputText.substring(0, i + j) + newWord[j] + outputText.substring(i + j + 1);
}
}
}
}
}


document.getElementById("outputText").innerText = outputText;
}
}

0 comments on commit c819808

Please sign in to comment.