// wink-nlp-utils
// NLP Functions for amplifying negations, managing elisions,
// creating ngrams, stems, phonetic codes to tokens and more.
//
// Copyright (C) GRAYPE Systems Private Limited
//
// This file is part of “wink-nlp-utils”.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//
var phnrgx = require( './phonetize_regexes.js' );
/* eslint no-underscore-dangle: "off" */
// ## string
// ### phonetize
/**
*
* Phonetizes the input string using an algorithmic adaptation of Metaphone; It
* is not an exact implementation of Metaphone.
*
* @alias string#phonetize
* @param {string} word the input word.
* @return {string} phonetic code of `word`.
* @example
* phonetize( 'perspective' );
* // -> 'prspktv'
* phonetize( 'phenomenon' );
* // -> 'fnmnn'
*/
var phonetize = function ( word ) {
var p = word.toLowerCase();
// Remove repeating letters.
p = p.replace( phnrgx.repeatingChars, '$1');
// Drop first character of `kgknPairs`.
if ( phnrgx.kngnPairs.test( p ) ) {
p = p.substr( 1, p.length - 1 );
}
// Run Regex Express now!
p = p
// Change `ough` in the end as `f`,
.replace( phnrgx.ough, 'f' )
// Change `dg` to `j`, in `dge, dgi, dgy`.
.replace( phnrgx.dge, 'je' )
.replace( phnrgx.dgi, 'ji' )
.replace( phnrgx.dgy, 'jy' )
// Change `c` to `k` in `sch`
.replace( phnrgx.sch, 'sk' )
// Drop `c` in `sci, sce, scy`.
.replace( phnrgx.sci, 'si' )
.replace( phnrgx.sce, 'se' )
.replace( phnrgx.scy, 'sy' )
// Drop `t` if it appears as `tch`.
.replace( phnrgx.tch, 'ch' )
// Replace `tio & tia` by `sh`.
.replace( phnrgx.tio, 'sh' )
.replace( phnrgx.tia, 'sh' )
// Drop `b` if it appears as `mb` in the end.
.replace( phnrgx.mb_, 'm' )
// Drop `r` if it preceeds a vowel and not followed by a vowel or `y`
// .replace( rgx.vrnotvy, '$1$3' )
// Replace `c` by `s` in `ce, ci, cy`.
.replace( phnrgx.ce, 'se' )
.replace( phnrgx.ci, 'si' )
.replace( phnrgx.cy, 'sy' )
// Replace `cq` by `q`.
.replace( phnrgx.cq, 'q' )
// Replace `ck` by `k`.
.replace( phnrgx.ck, 'k' )
// Replace `ph` by `f`.
.replace( phnrgx.ph, 'f' )
// Replace `th` by `0` (theta look alike!).
.replace( phnrgx.th, '0' )
// Replace `c` by `k` if it is not followed by `h`.
.replace( phnrgx.cnoth, 'k$2' )
// Replace `q` by `k`.
.replace( phnrgx.q, 'k' )
// Replace `x` by `s` if it appears in the beginning.
.replace( phnrgx._x, 's' )
// Other wise replace `x` by `ks`.
.replace( phnrgx.x, 'ks' )
// Replace `sh, sia, sio` by `x`. Needs to be done post `x` processing!
.replace( phnrgx.sh, 'x' )
// Drop `y` if it is now followed by a **vowel**.
.replace( phnrgx.ynotv, '$2' )
.replace( phnrgx.y_, '' )
// Replace `z` by `s`.
.replace( phnrgx.z, 's' )
// Drop all **vowels** excluding the first one.
.replace( phnrgx.__vowels, '' );
return ( p );
}; // phonetize()
module.exports = phonetize;