// wink-lemmatizer
// English lemmatizer
//
// This file is part of “wink-lemmatizer”.
//
// Copyright (c) GRAYPE Systems Private Limited
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
// Load adjective/noun/verb exceptions.
const adjectiveExceptions = require( 'wink-lexicon/src/wn-adjective-exceptions.js' );
const nounExceptions = require( 'wink-lexicon/src/wn-noun-exceptions.js' );
const verbExceptions = require( 'wink-lexicon/src/wn-verb-exceptions.js' );
// Load all words (base form),
const words = require( 'wink-lexicon/src/wn-words.js' );
// and their senses.
const senseMap = require( 'wink-lexicon/src/wn-word-senses.js' );
// The name space.
const lemmatize = Object.create( null );
// The following code is an adaptation of [WordNet's Morphy](https://wordnet.princeton.edu/documentation/morphy7wn):
// ### isAdjective
/**
* Checks the word in base form is an adjective or not using wordnet senses.
*
* @private
* @method isAdjective
* @param {string} word that needs to be tested for adjective.
* @return {boolean} `true` if word is a valid adjective otherwise `false.`
* @example
* isAdjective( 'lat' );
* // -> false
*/
var isAdjective = function ( word ) {
const index = words[ word ];
if ( index === undefined ) return false;
const senses = senseMap[ index ];
for ( let k = 0; k < senses.length; k += 1 ) {
if ( senses[ k ] < 2 ) return true;
}
return false;
}; // isAdjective()
// ### lemmatizeAdjective
/**
*
* Conjugates an `adjective` to it's base form (VB). It also has an alias
* `lemmatizeAdjective` to maintain API level compatibility with previous version.
*
* @method adjective
* @param {string} adjective that needs to be conjugated to base form.
* @return {string} the base form of `adjective`.
* @example
* lemmatize.adjective( 'farthest' );
* // -> far
*/
lemmatize.adjective = function ( adjective ) {
var lemma = adjectiveExceptions[ adjective ];
if ( lemma ) return lemma;
lemma = adjective.replace( /est$|er$/, '' );
if ( lemma.length === adjective.length ) return adjective;
if ( isAdjective( lemma ) ) return lemma;
lemma += 'e';
if ( isAdjective( lemma ) ) return lemma;
return adjective;
}; // adjective()
// ### isVerb
/**
* Checks the word in base form is a verb or not using wordnet senses.
*
* @private
* @method isVerb
* @param {string} word that needs to be tested for verb.
* @return {boolean} `true` if word is a valid verb otherwise `false.`
* @example
* isVerb( 'eat' );
* // -> true
*/
var isVerb = function ( word ) {
const index = words[ word ];
if ( index === undefined ) return false;
const senses = senseMap[ index ];
for ( let k = 0; k < senses.length; k += 1 ) {
if ( senses[ k ] > 28 && senses[ k ] < 44 ) return true;
}
return false;
}; // isVerb()
// ### lemmatizeVerb
/**
*
* Conjugates a `verb` to it's base form (VB). It also has an alias
* `lemmatizeVerb` to maintain API level compatibility with previous version.
*
* @method verb
* @param {string} verb that needs to be conjugated to base form.
* @return {string} the base form of `verb`.
* @example
* lemmatize.verb( 'winning' );
* // -> win
*/
lemmatize.verb = function ( verb ) {
var lemma = verbExceptions[ verb ];
if ( lemma ) return lemma;
lemma = verb.replace( /s$/, '' );
if ( lemma.length !== verb.length && isVerb( lemma ) ) return lemma;
lemma = verb.replace( /ies$/, 'y' );
if ( lemma.length !== verb.length && isVerb( lemma ) ) return lemma;
lemma = verb.replace( /es$|ed$|ing$/, '' );
if ( lemma.length !== verb.length ) {
if ( isVerb( lemma ) ) return lemma;
lemma += 'e';
if ( isVerb( lemma ) ) return lemma;
}
return verb;
}; // verb()
const nounRegexes = [
{ replace: /s$/, by: '' },
{ replace: /ses$/, by: 's' },
{ replace: /xes$/, by: 'x' },
{ replace: /zes$/, by: 's' },
{ replace: /ves$/, by: 'f' },
{ replace: /ches$/, by: 'ch' },
{ replace: /shes$/, by: 'sh' },
{ replace: /men$/, by: 'man' },
{ replace: /ies$/, by: 'y' }
];
// ### isNoun
/**
* Checks the word in base form is a noun or not using wordnet senses.
*
* @private
* @method isNoun
* @param {string} word that needs to be tested for noun.
* @return {boolean} `true` if word is a valid noun otherwise `false.`
* @example
* isAdjective( 'house' );
* // -> true
*/
var isNoun = function ( word ) {
const index = words[ word ];
if ( index === undefined ) return false;
const senses = senseMap[ index ];
for ( let k = 0; k < senses.length; k += 1 ) {
if ( senses[ k ] > 2 && senses[ k ] < 29 ) return true;
}
return false;
}; // isNoun()
// ### lemmatizeNoun
/**
*
* Converts the input `noun` to it's singular form. It also has an alias
* `lemmatizeNoun` to maintain API level compatibility with previous version.
*
* @method noun
* @param {string} noun that needs to be lemmatized.
* @return {string} the singular of `noun`.
* @example
* lemmatize.noun( 'handkerchieves' );
* // -> handkerchief
*/
lemmatize.noun = function ( noun ) {
var lemma = nounExceptions[ noun ];
if ( lemma ) return lemma;
lemma = noun;
for ( let k = 0; k < nounRegexes.length; k += 1 ) {
lemma = noun.replace( nounRegexes[ k ].replace, nounRegexes[ k ].by );
if ( lemma.length !== noun.length && isNoun( lemma ) ) return lemma;
}
return noun;
}; // noun()
// Create alias to maintain backwards compatibility.
lemmatize.lemmatizeNoun = lemmatize.noun;
lemmatize.lemmatizeVerb = lemmatize.verb;
lemmatize.lemmatizeAdjective = lemmatize.adjective;
module.exports = lemmatize;