-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
226 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
/* | ||
js-long-s/romance.js | ||
by TravisGK | ||
--- | ||
this file contains the functions necessary to convert words in romantic languages | ||
so that they use the archaeic letter called the long S (ſ). | ||
although English is a germanic language, it's rules are regular enough to be | ||
facilitated by code already in this file, so it's conversion function is here. | ||
*/ | ||
|
||
function removeAccents(str) { | ||
return str.normalize("NFD").replace(/[\u0300-\u036f]/g, ""); | ||
} | ||
|
||
function romanceConversion( | ||
text, | ||
mainPattern, | ||
exclusionPattern=null, | ||
ssiUsesDoubleSS=true, | ||
) { | ||
// this func returns text that uses the long s. | ||
let cleanText = removeAccents(text); | ||
|
||
// finds the indices of the letter S that follow the given pattern. | ||
let match; | ||
let indices = []; | ||
while ((match = mainPattern.exec(cleanText)) !== null) { | ||
indices.push(match.index); | ||
} | ||
|
||
// filters out excluded indices. | ||
if (exclusionPattern !== null) { | ||
let excludedIndices = []; | ||
while ((match = exclusionPattern.exec(cleanText)) !== null) { | ||
excludedIndices.push(match.index); | ||
} | ||
indices = indices.filter(i => !excludedIndices.includes(i)); | ||
} | ||
|
||
indices.forEach(i => { | ||
text = text.substring(0, i) + "ſ" + text.substring(i + 1); | ||
}); | ||
|
||
if (ssiUsesDoubleSS) { | ||
return text; | ||
} | ||
|
||
const pattern = /ſſi/g; | ||
text = text.replace(pattern, "ſsi"); | ||
return text; | ||
} | ||
|
||
function englishConversion(text) { | ||
const ROUND_S_BEFORE_BK = false; // true in 17th and early 18th century. | ||
|
||
let mainPattern; | ||
if (ROUND_S_BEFORE_BK) { | ||
mainPattern = /s(?=[a-ac-jm-zA-AC-JM-Z])/g; | ||
} else { | ||
mainPattern = /s(?=[a-zA-Z])/g; | ||
} | ||
let exclusionPattern = /ss(?=f|F)|(?<=f|F)s/g; | ||
text = romanceConversion(text, mainPattern, exclusionPattern); | ||
|
||
let pattern = /ſſſ/g; | ||
text = text.replace(pattern, "ſsſ"); | ||
|
||
return [ | ||
text, // converted text. | ||
true, // if any replacement is made. | ||
true, // if any fancy in-place replacements are needed | ||
]; | ||
} | ||
|
||
function spanishConversion(text) { | ||
const USE_LONG_S_BEFORE_ACCENTED_O = false; | ||
|
||
let mainPattern = /s(?=[a-ac-eg-gi-zA-AC-EG-GI-Z-—])/g; | ||
let exclusionPattern; | ||
if (USE_LONG_S_BEFORE_ACCENTED_O) { | ||
exclusionPattern = /s(?=[áàéèíìúùüÁÀÉÈÍÌÚÙÜ])/g; | ||
} else { | ||
exclusionPattern = /s(?=[áàéèíìóòúüÁÀÉÈÍÌÓÒÚÙÜ])/g; | ||
} | ||
|
||
text = romanceConversion(text, mainPattern, exclusionPattern, false); | ||
return [ | ||
text, // converted text. | ||
true, // if any replacement is made. | ||
true, // if any fancy in-place replacements are needed | ||
]; | ||
} | ||
|
||
function frenchConversion(text) { | ||
let mainPattern = /s(?=[a-ac-eg-gi-zA-AC-EG-GI-Z])/g; | ||
let exclusionPattern = null; | ||
text = romanceConversion(text, mainPattern, exclusionPattern); | ||
|
||
return [ | ||
text, // converted text. | ||
true, // if any replacement is made. | ||
true, // if any fancy in-place replacements are needed | ||
]; | ||
} | ||
|
||
function italianConversion(text) { | ||
const USE_DOUBLE_LONG_WITH_SSI = true; | ||
|
||
let mainPattern = /s(?=[a-ac-eg-zA-AC-EG-Z-—])/g; | ||
let exclusionPattern = /s(?=[áàéèíìóòúüÁÀÉÈÍÌÓÒÚÙÜ])/g; | ||
text = romanceConversion( | ||
text, mainPattern, exclusionPattern, USE_DOUBLE_LONG_WITH_SSI | ||
); | ||
|
||
return [ | ||
text, // converted text. | ||
true, // if any replacement is made. | ||
true, // if any fancy in-place replacements are needed | ||
]; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
function isLetter(char) { | ||
// returns true if the given character is a letter. | ||
const category = char.codePointAt(0); | ||
return (category >= 0x0041 && category <= 0x005A) || // A-Z. | ||
(category >= 0x0061 && category <= 0x007A) || // a-z. | ||
(category >= 0x00C0 && category <= 0x00FF); // accented letters (Lat-1). | ||
} | ||
|
||
function splitStringWithIndices(inputString, lang) { | ||
const APOSTROPHES = "'"; | ||
const results = []; | ||
const regex = /\S+/g; // matches non-whitespace sequences. | ||
|
||
// uses regex to find all matches in the input string. | ||
let match; | ||
while ((match = regex.exec(inputString)) !== null) { | ||
let index = match.index; | ||
let word = match[0]; | ||
|
||
// finds the first letter of the word. | ||
let localStartIndex = 0; | ||
for (let i = 0; i < word.length; i++) { | ||
if (isLetter(word[i])) { | ||
localStartIndex = i; | ||
break; | ||
} | ||
} | ||
|
||
if (localStartIndex === word.length) continue; // no letters found. | ||
|
||
let localEndIndex = word.length - 1; | ||
if (lang === "de") { | ||
// if German, it considers apostrophes for ending index. | ||
for (let i = word.length - 1; i >= 0; i--) { | ||
if (isLetter(word[i]) || word[i] === APOSTROPHES) { | ||
localEndIndex = i; | ||
break; | ||
} | ||
} | ||
} else { | ||
// for other languages, it only considers letters for ending index. | ||
for (let i = word.length - 1; i >= 0; i--) { | ||
if (isLetter(word[i])) { | ||
localEndIndex = i; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
index += localStartIndex; | ||
word = word.substring(localStartIndex, localEndIndex + 1); | ||
results.push([index, word]); | ||
} | ||
|
||
return results; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,55 @@ | ||
function convertText() { | ||
// Get the text from the textarea | ||
const keepUnknownS = false; | ||
|
||
const inputText = document.getElementById("inputText").value; | ||
|
||
// Get the selected option from the dropdown | ||
const langOption = document.getElementById("langOption").value; | ||
|
||
// Variable to hold the output | ||
let outputText = ""; | ||
|
||
// Check what option was selected and perform the appropriate conversion | ||
let outputText = inputText; | ||
let convertFunc = null; | ||
|
||
if (langOption === "en") { | ||
outputText = "EN"; | ||
convertFunc = englishConversion; | ||
} else if (langOption === "es") { | ||
outputText = "ES"; | ||
convertFunc = spanishConversion; | ||
//const result = spanishConversion(inputText); | ||
//outputText = result[0]; // modified text. | ||
} else if (langOption === "fr") { | ||
outputText = "FR"; | ||
convertFunc = frenchConversion; | ||
// const result = frenchConversion(inputText); | ||
// outputText = result[0]; // modified text. | ||
} else if (langOption === "it") { | ||
outputText = "IT"; | ||
convertFunc = italianConversion; | ||
// const result = italianConversion(inputText); | ||
// outputText = result[0]; // modified text. | ||
} else { | ||
outputText = "DE"; | ||
} | ||
|
||
// Display the output text | ||
|
||
if (convertFunc !== null) { | ||
let results = splitStringWithIndices(inputText, langOption); | ||
for (let [i, oldWord] of results) { | ||
let [newWord, replacementMade, useFancyReplace] = convertFunc(oldWord); | ||
|
||
if (!replacementMade) { | ||
continue; | ||
} | ||
|
||
if (!useFancyReplace) { | ||
outputText = outputText.substring(0, i) + newWord + outputText.substring(i + newWord.length); | ||
} else { | ||
if (!keepUnknownS) { | ||
newWord = newWord.replace(/X/g, "ſ"); // replaces UNKNOWN_S with long s | ||
} | ||
|
||
for (let j = 0; j < newWord.length; j++) { | ||
if (outputText[i + j] === "s") { | ||
outputText = outputText.substring(0, i + j) + newWord[j] + outputText.substring(i + j + 1); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
|
||
document.getElementById("outputText").innerText = outputText; | ||
} | ||
} |