From c819808056fcbfa31f6b4b5d8458b62ef1166660 Mon Sep 17 00:00:00 2001 From: Travis G_____ <162270915+travisgk@users.noreply.github.com> Date: Fri, 20 Sep 2024 11:28:18 -0500 Subject: [PATCH] Add files via upload --- index.html | 12 ++-- js-long-s/romance.js | 120 +++++++++++++++++++++++++++++++++++++++ js-long-s/splitstring.js | 56 ++++++++++++++++++ script.js | 57 ++++++++++++++----- 4 files changed, 226 insertions(+), 19 deletions(-) create mode 100644 js-long-s/romance.js create mode 100644 js-long-s/splitstring.js diff --git a/index.html b/index.html index 62a9173..ee289ae 100644 --- a/index.html +++ b/index.html @@ -7,9 +7,14 @@

Long S Converter

-

This tool converts text to use the archaic long S letter ( ſ ) in its spellings.

+ + + + + +

This tool converts text to use the archaic long S letter (ſ) in its spellings.

-

English, Spanish, French, Italian, and German are supported.

+

English, Spanish, French, and Italian are supported.


@@ -24,7 +29,6 @@

Long S Converter

- @@ -33,7 +37,5 @@

Long S Converter

- - diff --git a/js-long-s/romance.js b/js-long-s/romance.js new file mode 100644 index 0000000..4ca4eef --- /dev/null +++ b/js-long-s/romance.js @@ -0,0 +1,120 @@ +/* +js-long-s/romance.js +by TravisGK +--- +this file contains the functions necessary to convert words in romantic languages +so that they use the archaeic letter called the long S (ſ). +although English is a germanic language, it's rules are regular enough to be +facilitated by code already in this file, so it's conversion function is here. +*/ + +function removeAccents(str) { + return str.normalize("NFD").replace(/[\u0300-\u036f]/g, ""); +} + +function romanceConversion( + text, + mainPattern, + exclusionPattern=null, + ssiUsesDoubleSS=true, +) { + // this func returns text that uses the long s. + let cleanText = removeAccents(text); + + // finds the indices of the letter S that follow the given pattern. + let match; + let indices = []; + while ((match = mainPattern.exec(cleanText)) !== null) { + indices.push(match.index); + } + + // filters out excluded indices. + if (exclusionPattern !== null) { + let excludedIndices = []; + while ((match = exclusionPattern.exec(cleanText)) !== null) { + excludedIndices.push(match.index); + } + indices = indices.filter(i => !excludedIndices.includes(i)); + } + + indices.forEach(i => { + text = text.substring(0, i) + "ſ" + text.substring(i + 1); + }); + + if (ssiUsesDoubleSS) { + return text; + } + + const pattern = /ſſi/g; + text = text.replace(pattern, "ſsi"); + return text; +} + +function englishConversion(text) { + const ROUND_S_BEFORE_BK = false; // true in 17th and early 18th century. + + let mainPattern; + if (ROUND_S_BEFORE_BK) { + mainPattern = /s(?=[a-ac-jm-zA-AC-JM-Z])/g; + } else { + mainPattern = /s(?=[a-zA-Z])/g; + } + let exclusionPattern = /ss(?=f|F)|(?<=f|F)s/g; + text = romanceConversion(text, mainPattern, exclusionPattern); + + let pattern = /ſſſ/g; + text = text.replace(pattern, "ſsſ"); + + return [ + text, // converted text. + true, // if any replacement is made. + true, // if any fancy in-place replacements are needed + ]; +} + +function spanishConversion(text) { + const USE_LONG_S_BEFORE_ACCENTED_O = false; + + let mainPattern = /s(?=[a-ac-eg-gi-zA-AC-EG-GI-Z-—])/g; + let exclusionPattern; + if (USE_LONG_S_BEFORE_ACCENTED_O) { + exclusionPattern = /s(?=[áàéèíìúùüÁÀÉÈÍÌÚÙÜ])/g; + } else { + exclusionPattern = /s(?=[áàéèíìóòúüÁÀÉÈÍÌÓÒÚÙÜ])/g; + } + + text = romanceConversion(text, mainPattern, exclusionPattern, false); + return [ + text, // converted text. + true, // if any replacement is made. + true, // if any fancy in-place replacements are needed + ]; +} + +function frenchConversion(text) { + let mainPattern = /s(?=[a-ac-eg-gi-zA-AC-EG-GI-Z])/g; + let exclusionPattern = null; + text = romanceConversion(text, mainPattern, exclusionPattern); + + return [ + text, // converted text. + true, // if any replacement is made. + true, // if any fancy in-place replacements are needed + ]; +} + +function italianConversion(text) { + const USE_DOUBLE_LONG_WITH_SSI = true; + + let mainPattern = /s(?=[a-ac-eg-zA-AC-EG-Z-—])/g; + let exclusionPattern = /s(?=[áàéèíìóòúüÁÀÉÈÍÌÓÒÚÙÜ])/g; + text = romanceConversion( + text, mainPattern, exclusionPattern, USE_DOUBLE_LONG_WITH_SSI + ); + + return [ + text, // converted text. + true, // if any replacement is made. + true, // if any fancy in-place replacements are needed + ]; +} \ No newline at end of file diff --git a/js-long-s/splitstring.js b/js-long-s/splitstring.js new file mode 100644 index 0000000..9f3f388 --- /dev/null +++ b/js-long-s/splitstring.js @@ -0,0 +1,56 @@ +function isLetter(char) { + // returns true if the given character is a letter. + const category = char.codePointAt(0); + return (category >= 0x0041 && category <= 0x005A) || // A-Z. + (category >= 0x0061 && category <= 0x007A) || // a-z. + (category >= 0x00C0 && category <= 0x00FF); // accented letters (Lat-1). +} + +function splitStringWithIndices(inputString, lang) { + const APOSTROPHES = "'"; + const results = []; + const regex = /\S+/g; // matches non-whitespace sequences. + + // uses regex to find all matches in the input string. + let match; + while ((match = regex.exec(inputString)) !== null) { + let index = match.index; + let word = match[0]; + + // finds the first letter of the word. + let localStartIndex = 0; + for (let i = 0; i < word.length; i++) { + if (isLetter(word[i])) { + localStartIndex = i; + break; + } + } + + if (localStartIndex === word.length) continue; // no letters found. + + let localEndIndex = word.length - 1; + if (lang === "de") { + // if German, it considers apostrophes for ending index. + for (let i = word.length - 1; i >= 0; i--) { + if (isLetter(word[i]) || word[i] === APOSTROPHES) { + localEndIndex = i; + break; + } + } + } else { + // for other languages, it only considers letters for ending index. + for (let i = word.length - 1; i >= 0; i--) { + if (isLetter(word[i])) { + localEndIndex = i; + break; + } + } + } + + index += localStartIndex; + word = word.substring(localStartIndex, localEndIndex + 1); + results.push([index, word]); + } + + return results; +} diff --git a/script.js b/script.js index 1237147..c2494d0 100644 --- a/script.js +++ b/script.js @@ -1,26 +1,55 @@ function convertText() { - // Get the text from the textarea + const keepUnknownS = false; + const inputText = document.getElementById("inputText").value; - - // Get the selected option from the dropdown const langOption = document.getElementById("langOption").value; - // Variable to hold the output - let outputText = ""; - - // Check what option was selected and perform the appropriate conversion + let outputText = inputText; + let convertFunc = null; + if (langOption === "en") { - outputText = "EN"; + convertFunc = englishConversion; } else if (langOption === "es") { - outputText = "ES"; + convertFunc = spanishConversion; + //const result = spanishConversion(inputText); + //outputText = result[0]; // modified text. } else if (langOption === "fr") { - outputText = "FR"; + convertFunc = frenchConversion; + // const result = frenchConversion(inputText); + // outputText = result[0]; // modified text. } else if (langOption === "it") { - outputText = "IT"; + convertFunc = italianConversion; + // const result = italianConversion(inputText); + // outputText = result[0]; // modified text. } else { outputText = "DE"; } - - // Display the output text + + if (convertFunc !== null) { + let results = splitStringWithIndices(inputText, langOption); + for (let [i, oldWord] of results) { + let [newWord, replacementMade, useFancyReplace] = convertFunc(oldWord); + + if (!replacementMade) { + continue; + } + + if (!useFancyReplace) { + outputText = outputText.substring(0, i) + newWord + outputText.substring(i + newWord.length); + } else { + if (!keepUnknownS) { + newWord = newWord.replace(/X/g, "ſ"); // replaces UNKNOWN_S with long s + } + + for (let j = 0; j < newWord.length; j++) { + if (outputText[i + j] === "s") { + outputText = outputText.substring(0, i + j) + newWord[j] + outputText.substring(i + j + 1); + } + } + } + } + } + + document.getElementById("outputText").innerText = outputText; -} +} \ No newline at end of file