import {
DEFAULT_OPTIONS,
UPPERCASE_START,
UPPERCASE_END,
FOUR_CHAR_EDGECASES,
FROM_ROMAJI,
} from './constants';
import isCharInRange from './utils/isCharInRange';
import isCharUpperCase from './utils/isCharUpperCase';
import getChunkSize from './utils/getChunkSize';
import getChunk from './utils/getChunk';
import isCharConsonant from './utils/isCharConsonant';
import isCharVowel from './utils/isCharVowel';
import hiraganaToKatakana from './utils/hiraganaToKatakana';
import isKana from './isKana';
/**
* Convert [Romaji](https://en.wikipedia.org/wiki/Romaji) to [Kana](https://en.wikipedia.org/wiki/Kana), lowercase text will result in [Hiragana](https://en.wikipedia.org/wiki/Hiragana) and uppercase text will result in [Katakana](https://en.wikipedia.org/wiki/Katakana).
* @param {String} [input=''] text
* @param {DefaultOptions} [options=defaultOptions]
* @return {String} converted text
* @example
* toKana('onaji BUTTSUUJI')
* // => 'おなじ ブッツウジ'
* toKana('ONAJI buttsuuji')
* // => 'オナジ ぶっつうじ'
* toKana('座禅‘zazen’スタイル')
* // => '座禅「ざぜん」スタイル'
* toKana('batsuge-mu')
* // => 'ばつげーむ'
* toKana('!?.:/,~-‘’“”[](){}') // Punctuation conversion
* // => '!?。:・、〜ー「」『』[](){}'
* toKana('we', { useObsoleteKana: true })
* // => 'ゑ'
*/
function toKana(input = '', options = {}, ignoreCase = false) {
const config = Object.assign({}, DEFAULT_OPTIONS, options);
// Final output array
const kana = [];
// Position in the string that is being evaluated
let cursor = 0;
const len = input.length;
const maxChunk = 3;
let chunkSize = 3;
let chunk = '';
let chunkLC = '';
// Steps through the string pulling out chunks of characters. Each chunk will be evaluated
// against the romaji to kana table. If there is no match, the last character in the chunk
// is dropped and the chunk is reevaluated. If nothing matches, the character is assumed
// to be invalid or punctuation or other and gets passed through.
while (cursor < len) {
let kanaChar = null;
chunkSize = getChunkSize(maxChunk, len - cursor);
while (chunkSize > 0) {
chunk = getChunk(input, cursor, cursor + chunkSize);
chunkLC = chunk.toLowerCase();
// Handle super-rare edge cases with 4 char chunks (like ltsu, chya, shya)
if (FOUR_CHAR_EDGECASES.includes(chunkLC) && (len - cursor) >= 4) {
chunkSize += 1;
chunk = getChunk(input, cursor, cursor + chunkSize);
chunkLC = chunk.toLowerCase();
} else {
// Handle edge case of n followed by consonant
if (chunkLC.charAt(0) === 'n') {
if (chunkSize === 2) {
// Handle edge case of n followed by a space (only if not in IME mode)
if (!config.IMEMode && chunkLC.charAt(1) === ' ') {
kanaChar = 'ん ';
break;
}
// Convert IME input of n' to "ん"
if (config.IMEMode && chunkLC === "n'") {
kanaChar = 'ん';
break;
}
}
// Handle edge case of n followed by n and vowel
if (isCharConsonant(chunkLC.charAt(1), false) && isCharVowel(chunkLC.charAt(2))) {
chunkSize = 1;
chunk = getChunk(input, cursor, cursor + chunkSize);
chunkLC = chunk.toLowerCase();
}
}
// Handle case of double consonants
if (chunkLC.charAt(0) !== 'n' &&
isCharConsonant(chunkLC.charAt(0)) &&
chunk.charAt(0) === chunk.charAt(1)
) {
chunkSize = 1;
// Return katakana ッ if chunk is uppercase, otherwise return hiragana っ
if (isCharInRange(chunk.charAt(0), UPPERCASE_START, UPPERCASE_END)) {
chunkLC = 'ッ';
chunk = 'ッ';
} else {
chunkLC = 'っ';
chunk = 'っ';
}
}
}
kanaChar = FROM_ROMAJI[chunkLC];
// console.log(`${cursor}x${chunkSize}:${chunk} => ${kanaChar}`); // DEBUG
if (kanaChar != null) {
break;
}
// Step down the chunk size.
// If chunkSize was 4, step down twice.
if (chunkSize === 4) {
chunkSize -= 2;
} else {
chunkSize -= 1;
}
}
// Passthrough undefined values
if (kanaChar == null) {
kanaChar = chunk;
}
// Handle special cases.
if (config.useObsoleteKana) {
if (chunkLC === 'wi') kanaChar = 'ゐ';
if (chunkLC === 'we') kanaChar = 'ゑ';
}
if (!!config.IMEMode && chunkLC.charAt(0) === 'n') {
if ((input.charAt(cursor + 1).toLowerCase() === 'y' &&
isCharVowel(input.charAt(cursor + 2)) === false) ||
cursor === (len - 1) ||
isKana(input.charAt(cursor + 1))
) {
// Don't transliterate this yet.
kanaChar = chunk.charAt(0);
}
}
// Use katakana if first letter in chunk is uppercase
if (!ignoreCase) {
if (isCharUpperCase(chunk.charAt(0))) {
kanaChar = hiraganaToKatakana(kanaChar);
}
}
kana.push(kanaChar);
cursor += chunkSize || 1;
}
return kana.join('');
}
export default toKana;