Add files via upload

travisgk · Sep 20, 2024 · c819808 · c819808
1 parent 437e9b1
commit c819808
Show file tree

Hide file tree

Showing 4 changed files with 226 additions and 19 deletions.
diff --git a/index.html b/index.html
@@ -7,9 +7,14 @@
 </head>
 <body>
     <h1>Long S Converter</h1>
-    <p>This tool converts text to use the archaic long S letter ( ſ ) in its spellings.</p>
+    <!-- loads scripts traditionally, no modules required. -->
+    <script src="js-long-s/splitstring.js"></script>
+    <script src="js-long-s/romance.js"></script>
+    <script src="script.js"></script>
+
+    <p>This tool converts text to use the archaic long S letter (ſ) in its spellings.</p>
 
-    <p>English, Spanish, French, Italian, and German are supported.</p>
+    <p>English, Spanish, French, and Italian are supported.</p>
 
     <br>
 
@@ -24,7 +29,6 @@ <h1>Long S Converter</h1>
         <option value="es">Español</option>
         <option value="fr">Français</option>
         <option value="it">Italiano</option>
-        <option value="de">Deutsch</option>
     </select>
 
     <!-- Button to trigger conversion -->
@@ -33,7 +37,5 @@ <h1>Long S Converter</h1>
 
     <!-- Paragraph to display the output text -->
     <p id="outputText"></p>
-
-    <script src="script.js"></script>
 </body>
 </html>
diff --git a/js-long-s/romance.js b/js-long-s/romance.js
@@ -0,0 +1,120 @@
+/*
+js-long-s/romance.js
+by TravisGK
+---
+this file contains the functions necessary to convert words in romantic languages
+so that they use the archaeic letter called the long S (ſ).
+although English is a germanic language, it's rules are regular enough to be
+facilitated by code already in this file, so it's conversion function is here.
+*/
+
+function removeAccents(str) {
+    return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
+}
+
+function romanceConversion(
+    text,
+    mainPattern,
+    exclusionPattern=null, 
+    ssiUsesDoubleSS=true,
+) {
+    // this func returns text that uses the long s.
+    let cleanText = removeAccents(text);
+
+    // finds the indices of the letter S that follow the given pattern.
+    let match;
+    let indices = [];
+    while ((match = mainPattern.exec(cleanText)) !== null) {
+        indices.push(match.index);
+    }
+
+    // filters out excluded indices.
+    if (exclusionPattern !== null) {
+        let excludedIndices = [];
+        while ((match = exclusionPattern.exec(cleanText)) !== null) {
+            excludedIndices.push(match.index);
+        }
+        indices = indices.filter(i => !excludedIndices.includes(i));
+    }
+
+    indices.forEach(i => {
+        text = text.substring(0, i) + "ſ" + text.substring(i + 1);
+    });
+
+    if (ssiUsesDoubleSS) {
+        return text;
+    }
+
+    const pattern = /ſſi/g;
+    text = text.replace(pattern, "ſsi");
+    return text;
+}
+
+function englishConversion(text) {
+    const ROUND_S_BEFORE_BK = false; // true in 17th and early 18th century.
+
+    let mainPattern;
+    if (ROUND_S_BEFORE_BK) {
+        mainPattern = /s(?=[a-ac-jm-zA-AC-JM-Z])/g;
+    } else {
+        mainPattern = /s(?=[a-zA-Z])/g;
+    }
+    let exclusionPattern = /ss(?=f|F)|(?<=f|F)s/g;
+    text = romanceConversion(text, mainPattern, exclusionPattern);
+
+    let pattern = /ſſſ/g;
+    text = text.replace(pattern, "ſsſ");
+
+    return [
+        text, // converted text.
+        true, // if any replacement is made.
+        true,  // if any fancy in-place replacements are needed
+    ];
+}
+
+function spanishConversion(text) {
+    const USE_LONG_S_BEFORE_ACCENTED_O = false;
+
+    let mainPattern = /s(?=[a-ac-eg-gi-zA-AC-EG-GI-Z-—])/g;
+    let exclusionPattern;
+    if (USE_LONG_S_BEFORE_ACCENTED_O) {
+        exclusionPattern = /s(?=[áàéèíìúùüÁÀÉÈÍÌÚÙÜ])/g;
+    } else {
+        exclusionPattern = /s(?=[áàéèíìóòúüÁÀÉÈÍÌÓÒÚÙÜ])/g;
+    }
+
+    text = romanceConversion(text, mainPattern, exclusionPattern, false);
+    return [
+        text, // converted text.
+        true, // if any replacement is made.
+        true,  // if any fancy in-place replacements are needed
+    ];
+}
+
+function frenchConversion(text) {
+    let mainPattern = /s(?=[a-ac-eg-gi-zA-AC-EG-GI-Z])/g;
+    let exclusionPattern = null;
+    text = romanceConversion(text, mainPattern, exclusionPattern);
+
+    return [
+        text, // converted text.
+        true, // if any replacement is made.
+        true,  // if any fancy in-place replacements are needed
+    ];
+}
+
+function italianConversion(text) {
+    const USE_DOUBLE_LONG_WITH_SSI = true;
+
+    let mainPattern = /s(?=[a-ac-eg-zA-AC-EG-Z-—])/g;
+    let exclusionPattern = /s(?=[áàéèíìóòúüÁÀÉÈÍÌÓÒÚÙÜ])/g;
+    text = romanceConversion(
+        text, mainPattern, exclusionPattern, USE_DOUBLE_LONG_WITH_SSI
+    );
+
+    return [
+        text, // converted text.
+        true, // if any replacement is made.
+        true,  // if any fancy in-place replacements are needed
+    ];
+}
diff --git a/js-long-s/splitstring.js b/js-long-s/splitstring.js
@@ -0,0 +1,56 @@
+function isLetter(char) {
+    // returns true if the given character is a letter.
+    const category = char.codePointAt(0);
+    return (category >= 0x0041 && category <= 0x005A) || // A-Z.
+           (category >= 0x0061 && category <= 0x007A) || // a-z.
+           (category >= 0x00C0 && category <= 0x00FF); // accented letters (Lat-1).
+}
+
+function splitStringWithIndices(inputString, lang) {
+    const APOSTROPHES = "'";
+    const results = [];
+    const regex = /\S+/g; // matches non-whitespace sequences.
+
+    // uses regex to find all matches in the input string.
+    let match;
+    while ((match = regex.exec(inputString)) !== null) {
+        let index = match.index;
+        let word = match[0];
+
+        // finds the first letter of the word.
+        let localStartIndex = 0;
+        for (let i = 0; i < word.length; i++) {
+            if (isLetter(word[i])) {
+                localStartIndex = i;
+                break;
+            }
+        }
+
+        if (localStartIndex === word.length) continue; // no letters found.
+
+        let localEndIndex = word.length - 1;
+        if (lang === "de") {
+            // if German, it considers apostrophes for ending index.
+            for (let i = word.length - 1; i >= 0; i--) {
+                if (isLetter(word[i]) || word[i] === APOSTROPHES) {
+                    localEndIndex = i;
+                    break;
+                }
+            }
+        } else {
+            // for other languages, it only considers letters for ending index.
+            for (let i = word.length - 1; i >= 0; i--) {
+                if (isLetter(word[i])) {
+                    localEndIndex = i;
+                    break;
+                }
+            }
+        }
+
+        index += localStartIndex;
+        word = word.substring(localStartIndex, localEndIndex + 1);
+        results.push([index, word]);
+    }
+
+    return results;
+}
diff --git a/script.js b/script.js
@@ -1,26 +1,55 @@
 function convertText() {
-    // Get the text from the textarea
+    const keepUnknownS = false;
+
     const inputText = document.getElementById("inputText").value;
-
-    // Get the selected option from the dropdown
     const langOption = document.getElementById("langOption").value;
 
-    // Variable to hold the output
-    let outputText = "";
-
-    // Check what option was selected and perform the appropriate conversion
+    let outputText = inputText;
+    let convertFunc = null;
+
     if (langOption === "en") {
-        outputText = "EN";
+        convertFunc = englishConversion;
     } else if (langOption === "es") {
-        outputText = "ES";
+        convertFunc = spanishConversion;
+        //const result = spanishConversion(inputText);
+        //outputText = result[0]; // modified text.
     } else if (langOption === "fr") {
-        outputText = "FR";
+        convertFunc = frenchConversion;
+        // const result = frenchConversion(inputText);
+        // outputText = result[0]; // modified text.
     } else if (langOption === "it") {
-        outputText = "IT";
+        convertFunc = italianConversion;
+        // const result = italianConversion(inputText);
+        // outputText = result[0]; // modified text.
     } else {
         outputText = "DE";
     }
-
-    // Display the output text
+
+    if (convertFunc !== null) {
+        let results = splitStringWithIndices(inputText, langOption);
+        for (let [i, oldWord] of results) {
+            let [newWord, replacementMade, useFancyReplace] = convertFunc(oldWord);
+
+            if (!replacementMade) {
+                continue;
+            }
+
+            if (!useFancyReplace) {
+                outputText = outputText.substring(0, i) + newWord + outputText.substring(i + newWord.length);
+            } else {
+                if (!keepUnknownS) {
+                    newWord = newWord.replace(/X/g, "ſ"); // replaces UNKNOWN_S with long s
+                }
+
+                for (let j = 0; j < newWord.length; j++) {
+                    if (outputText[i + j] === "s") {
+                        outputText = outputText.substring(0, i + j) + newWord[j] + outputText.substring(i + j + 1);
+                    }
+                }
+            }
+        }
+    }
+
+
     document.getElementById("outputText").innerText = outputText;
-}
+}