// wink-naive-bayes-text-classifier
// Configurable Naive Bayes Classifier for text
// with cross-validation support.
//
// Copyright (C) GRAYPE Systems Private Limited
//
// This file is part of “wink-naive-bayes-text-classifier”.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//
var helpers = require( 'wink-helpers' );
// Because we want to logically group the variables.
/* eslint sort-vars: 0 */
// It is a **N**aive **B**ayes **C**lassifier for **text** classification.
// It exposes following methods:
// 1. `definePrepTasks` allows to define a pipeline of functions that will be
// used to prepare each input prior to *learning*, *prediction*, and *evaluation*.
// 2. `defineConfig` sets up the configuration for *mode* and *smoothing factor*.
// 3. `learn` from example *input* and *label* pair(s).
// 4. `consolidate` learnings prior to evaluation and/or prediction.
// 5. `predict` the best *label* for the given *input*.
// 6. `stats` of learnings.
// 7. `exportJSON` exports the learnings in JSON format.
// 8. `importJSON` imports the learnings from JSON that may have been saved on disk.
// 9. `evaluate` the learnings from known examples of *input* and corresponding
// *label* by internally building a confusion matrix.
// 10. `metrics` are primarily macro-averages of *precison*, *recall*,
// and *f-measure* computed from the confusion matrix built during the evaluation
// phase.
// 11. `reset` all the learnings except the preparatory tasks; useful during
// cross-validation.
/**
*
* Creates an instance of a {@link NaiveBayesTextClassifier}.
*
* @return {NaiveBayesTextClassifier} object conatining set of API methods for tasks like configuration,
* data ingestion, learning, and prediction etc.
* @example
* // Load wink Naive Bayes Text Classifier.
* var naiveBayesTextClassifier = require( 'wink-naive-bayes-text-classifier' );
* // Create your instance of classifier.
* var myClassifier = naiveBayesTextClassifier();
*/
var naiveBayesTextClassifier = function () {
// Total samples encountered under each label during learning.
var samples = Object.create( null );
// Maintains label-wise count of each word encountered during learning.
var count = Object.create( null );
// Maintains count of words encountered under a label during learning.
var words = Object.create( null );
// The entire vocabulary.
var voc = new Set();
// Preparatory tasks that are executed on the `learn` & `predict` input.
var pTasks = [];
// And its count.
var pTaskCount;
// All the labels seen till the point of consolidation.
var labels;
// And their count: meant to be used in for-loops.
var labelCount;
// The `defineConfig()` checks this before latering config.
var learned = false;
// The `predict()` function checks for this being true; set in `consolidate()`.
var consolidated = false;
// The `metrics()` checks this; set in `evaluate()`.
var evaluated = false;
// Confusion Matrix.
var cm = Object.create( null );
// metrics: Precision, Recall, and F-Measure
var precision = Object.create( null );
var recall = Object.create( null );
var fmeasure = Object.create( null );
/**
* @classdesc Naive Bayes Text Classifier class.
* @class NaiveBayesTextClassifier
* @hideconstructor
*/
var methods = Object.create( null );
// Define unknown prediction.
var unknown = 'unknown';
// Configuration - `considerOnlyPresence` flag and `smoothingFactor`.
var config = Object.create( null );
// Set their default values.
config.considerOnlyPresence = false;
// Default smoothingFactor is set to Laplace add+1 smoothing.
config.smoothingFactor = 1;
// ### Private functions
// #### Prepare Input
/**
*
* Prepares the `input` by building a pipeline of tasks defined in the variable
* `pTasks` via `definePrepTasks()`.
*
* @param {string} input usually a text
* @return {string[]} tokens.
* @private
*/
var prepareInput = function ( input ) {
var processedInput = input;
for ( var i = 0; i < pTaskCount; i += 1 ) {
processedInput = pTasks[ i ]( processedInput );
}
return ( processedInput );
}; // prepareInput()
// #### Log Likelihood
/**
*
* Computes the pre-definable smoothed log likelihood `( w | label )`.
*
* @param {string} w word or token.
* @param {string} label i.e. class.
* @return {number} smoothed log likelihood.
* @private
*/
var logLikelihood = function ( w, label ) {
// To avoid recomputation.
var clw = ( count[ label ][ w ] || 0 );
return (
( config.smoothingFactor > 0 ) ?
// Numerator will never be **0** due to smoothing.
( Math.log2( ( clw + config.smoothingFactor ) ) -
Math.log2( words[ label ] + ( voc.size * config.smoothingFactor ) ) ) :
// Numerator will be 0 if `w` is not found under the `label`.
( clw ) ?
// Non-zero numerator means normal handling
( Math.log2( clw ) - Math.log2( ( words[ label ] + voc.size ) ) ) :
// Zero numerator: return **0**.
0
);
}; // logLikelihood()
// #### Inverse Log Likelihood
/**
*
* Computes the pre-definable smoothed inverse log likelihood `( w | label )`.
*
* @param {string} w word or token.
* @param {string} label i.e. class.
* @return {number} smoothed inverse log likelihood.
* @private
*/
var inverseLogLikelihood = function ( w, label ) {
// Index and temporary label.
var i, l;
// `count[ l ][ w ]`.
var clw = 0;
// `words[ l ]`
var wl = 0;
for ( i = 0; i < labelCount; i += 1 ) {
l = labels[ i ];
if ( l !== label ) {
wl += words[ l ];
clw += ( count[ l ][ w ] || 0 );
}
}
return (
( config.smoothingFactor > 0 ) ?
// Numerator will never be **0** due to smoothing.
( Math.log2( ( clw + config.smoothingFactor ) ) -
Math.log2( wl + ( voc.size * config.smoothingFactor ) ) ) :
// Numerator may be 0.
( clw ) ?
// Non-zero numerator means normal handling
( Math.log2( clw ) - Math.log2( ( wl + voc.size ) ) ) :
// Zero numerator: return **0**.
0
);
}; // inverseLogLikelihood()
// #### Odds
// Computes the odds for `( tokens | label )`.
/**
*
* Computes the odds for `( tokens | label )`.
*
* @param {string[]} tokens of the sentence.
* @param {string} label i.e. class of sentence.
* @return {number} odds for `( tokens | label )`.
* @private
*/
var odds = function ( tokens, label ) {
// Total number of samples encountered during training.
var sum = 0;
// Samples enountered under `label` during training.
var samplesInLabel = samples[ label ];
// Samples NOT enountered under the `label`.
var samplesNotInLabel = 0;
// Log Base 2 Likelihood & Inverse likelihood
var lh = 0,
ilh = 0;
// Temp Label.
var lbl, i, imax;
// Filter unknown tokens.
var ivTokens = tokens.filter( function ( e ) {
return voc.has( e );
} );
// No known tokens means simply return **0**.
if ( ivTokens.length === 0 ) return 0;
// Compute `samplesNotInLabel`.
for ( i = 0; i < labelCount; i += 1 ) {
lbl = labels[ i ];
sum += samples[ lbl ];
samplesNotInLabel += ( lbl === label ) ? 0 : samples[ lbl ];
}
// Update them for the given tokens for `label`
for ( i = 0, imax = ivTokens.length; i < imax; i += 1 ) {
lh += logLikelihood( ivTokens[ i ], label );
// If `lh` is **0** then ilh will be zero - avoid computation.
ilh += ( lh === 0 ) ? 0 : inverseLogLikelihood( ivTokens[ i ], label );
}
// Add prior probablities only if 1 or more tokens are found in `voc`.
if ( lh !== 0 ) {
// Add prior probabilities as `lh` (and therefore `ilh`) is **0**.
lh += ( Math.log2( samplesInLabel ) - Math.log2( sum ) );
ilh += ( Math.log2( samplesNotInLabel ) - Math.log2( sum ) );
}
// Return the log likelihoods ratio; subtract as it is a log. This will
// be a measure of distance between the probability & inverse probability.
return ( lh - ilh );
}; // odds()
// ### Exposed Functions
// #### Define Config
/**
*
* Defines the configuration for naive bayes text classifier. This
* must be called before attempting to [learn](#learn); in other words it can not be
* set once learning has started.
*
* @method NaiveBayesTextClassifier#defineConfig
* @param {object} cfg defines the configuration in terms of the following
* parameters:
* @param {boolean} [considerOnlyPresence=false] true indicates a binarized model.
* @param {number} [smoothingFactor=1] defines the value for additive smoothing.
* It can have any value between 0 and 1.
* @return {boolean} Always true.
* @example
* myClassifier.defineConfig( { considerOnlyPresence: true, smoothingFactor: 0.5 } );
* // -> true
* @throws Error if `cfg` is not a valid Javascript object, or `smoothingFactor` is invalid,
* or an attempt to define configuration is made after learning starts.
*/
var defineConfig = function ( cfg ) {
if ( learned ) {
throw Error( 'winkNBTC: config must be defined before learning starts!' );
}
if ( !helpers.object.isObject( cfg ) ) {
throw Error( 'winkNBTC: config must be an object, instead found: ' + ( typeof cfg ) );
}
config.considerOnlyPresence = ( typeof cfg.considerOnlyPresence === 'boolean' ) ?
cfg.considerOnlyPresence : false;
// If smoothing factor is undefined set it to laplace add+1 smoothing.
var sf = ( cfg.smoothingFactor === undefined ) ? 1 : parseFloat( cfg.smoothingFactor );
// Throw error for a value beyond 0-1 or NaN.
if ( isNaN( sf ) || ( sf < 0 ) || ( sf > 1 ) ) {
throw Error( 'winkNBTC: smoothing factor must be a number between 0 & 1, instead found: ' + JSON.stringify( sf ) );
}
// All good, set smoothingFactor as `sf`.
config.smoothingFactor = sf;
return true;
}; // defineConfig()
// #### Define Prep Tasks
// It sets the `pTasks` and returns length of `pTask` array.
/**
* Defines the text preparation `tasks` to transform raw incoming
* text into tokens required during
* [`learn()`](#learn), [`evaluate()`](#evaluate) and [`predict()`](#predict) operations.
* The `tasks` should be an array of functions;
* using these function a simple pipeline is built to serially transform the
* input to the output. A single helper function for preparing text is available that (a) tokenizes,
* (b) removes punctuations, symbols, numerals, URLs, stop words and (c) stems.
*
* @method NaiveBayesTextClassifier#definePrepTasks
* @param {function[]} tasks the first function
* in this array must accept a string as input and the last function must
* return tokens i.e. array of strings. Please refer to example.
* @return {number} The number of functions in `task` array.
* @example
* // Load wink NLP utilities
* var prepText = require( 'wink-naive-bayes-text-classifier/src/prep-text.js' );
* // Define the text preparation tasks.
* myClassifier.definePrepTasks( [ prepText ] );
* // -> 1
* @throws Error if `tasks` is not an array of functions.
*/
var definePrepTasks = function ( tasks ) {
if ( !helpers.array.isArray( tasks ) ) {
throw Error( 'winkNBTC: tasks should be an array, instead found: ' + JSON.stringify( tasks ) );
}
for ( var i = 0, imax = tasks.length; i < imax; i += 1 ) {
if ( typeof tasks[ i ] !== 'function' ) {
throw Error( 'winkNBTC: each task should be a function, instead found: ' + JSON.stringify( tasks[ i ] ) );
}
}
pTasks = tasks;
pTaskCount = tasks.length;
return pTaskCount;
}; // definePrepTasks()
// #### Learn
// Learns from example pair of `input` and `label`. It throws error if
// consolidation has already been carried out.
// If learning was successful then it returns `true`.
/**
*
* Learns from the example pair of `input` and its `label`.
*
* @method NaiveBayesTextClassifier#learn
* @param {string|string[]} input if it is a string, then [`definePrepTasks()`](#definePrepTasks)
* must be called before learning so that `input` string is transformed
* into tokens on the fly.
* @param {string} label of class to which `input` belongs.
* @return {boolean} Always true.
* @example
* myClassifier.learn( 'I need loan for a new vehicle', 'autoloan' );
* // -> true
* @throws Error if learnings have been already [consolidated](#consolidate).
*/
var learn = function ( input, label ) {
// No point in learning further, if learnings so far have been consolidated.
if ( consolidated ) {
throw Error( 'winkNBTC: post consolidation learning is not possible!' );
}
// Set learning started.
learned = true;
// Prepare the input.
var tkns = prepareInput( input );
// Update vocubulary, count, and words i.e. learn!
samples[ label ] = 1 + ( samples[ label ] || 0 );
if ( config.considerOnlyPresence ) tkns = new Set( tkns );
count[ label ] = count[ label ] || Object.create( null );
tkns.forEach( function ( token ) {
count[ label ][ token ] = 1 + ( count[ label ][ token ] || 0 );
voc.add( token );
words[ label ] = 1 + ( words[ label ] || 0 );
} );
return true;
}; // learn()
// #### Consolidate
// Consolidates the learnings in following steps:
// 1. Check presence of minimal learning mass, if present proceed further;
// otherwise it throws appropriate error.
// 2. Initializes the confusion matrix and metrics.
/**
*
* Consolidates the learning. It is a prerequisite for [`evaluate()`](#evaluate)
* and/or [`predict()`](#predict).
*
* @method NaiveBayesTextClassifier#consolidate
* @return {boolean} Always true.
* @example
* myClassifier.consolidate();
* // -> true
* @throws Error if training data belongs to only a single class label or
* the training data is too small for learning.
*/
var consolidate = function () {
var row, col;
var i, j;
// Extract all labels that have been seen during learning phase.
labels = helpers.object.keys( samples );
labelCount = labels.length;
// A quick & simple check of some minimal learning mass!
if ( labelCount < 2 ) {
throw Error( 'winkNBTC: can not consolidate as classification require 2 or more labels!' );
}
if ( voc.size < 10 ) {
throw Error( 'winkNBTC: vocabulary is too small to learn meaningful classification!' );
}
// Initialize confusion matrix and metrics.
for ( i = 0; i < labelCount; i += 1 ) {
row = labels[ i ];
cm[ row ] = Object.create( null );
precision[ row ] = 0;
recall[ row ] = 0;
fmeasure[ row ] = 0;
for ( j = 0; j < labelCount; j += 1 ) {
col = labels[ j ];
cm[ row ][ col ] = 0;
}
}
// Set `consolidated` as `true`.
consolidated = true;
return true;
}; // consolidate()
// #### compute odds
// Computes odds for every **label** for the given `input`, provided learnings
// have been consolidated. They are sorted in descending order of their odds.
// It throws error if the learnings have not been consolidated. Note, the odds
// is actually the **log2** of odds.
/**
* Computes the log base-2 of odds of every label for the input; and returns
* the array of `[ label, odds ]` in descending order of odds.
*
* @method NaiveBayesTextClassifier#computeOdds
* @param {String|String[]} input is either text or tokens determined by the
* choice of [`preparatory tasks`](#definePrepTasks).
* @return {array[]} Array of `[ label, odds ]` in descending order of odds.
* @example
* myClassifier.computeOdds( 'I want to pay my car loan early' );
* // -> [
* [ 'prepay', 6.169686751688911 ],
* [ 'autoloan', -6.169686751688911 ]
* ]
*/
var computeOdds = function ( input ) {
// Predict only if learnings have been consolidated!
if ( !consolidated ) {
throw Error( 'winkNBTC: prediction is not possible unless learnings are consolidated!' );
}
// Contains label & the corresponding odds pairs.
var allOdds = [];
// Temporary label.
var label;
for ( var i = 0; i < labelCount; i += 1 ) {
label = labels[ i ];
allOdds.push( [ label, odds( prepareInput( input ), label ) ] );
}
// Sort descending for argmax.
allOdds.sort( helpers.array.descendingOnValue );
// If odds for the top label is 0 means prediction is `unknown`
// otherwise return the corresponding label.
return ( ( allOdds[ 0 ][ 1 ] ) ? allOdds : [ [ unknown, 0 ] ] );
};
// #### Predict
// Predicts the potential **label** for the given `input`, provided learnings
// have been consolidated. If all the `input` tokens have never been seen
// in past (i.e. absent in learnings), then the predicted label is `unknown`.
// It throws error if the learnings have not been consolidated.
/**
*
* Predicts the class label for the `input`. If it is unable to predict then it
* returns a value **`unknown`**.
*
* @method NaiveBayesTextClassifier#predict
* @param {String|String[]} input is either text or tokens determined by the
* choice of [`preparatory tasks`](#definePrepTasks).
* @return {String} The predicted class label for the `input`.
* @example
* myClassifier.predict( 'I want to pay my car loan early' );
* // -> prepay
*/
var predict = function ( input ) {
// Contains label & the corresponding odds pairs.
var allOdds = computeOdds( input );
return ( allOdds[ 0 ][ 0 ] );
}; // predict()
// #### Stats
/**
* Returns basic stats of learning in terms of count of samples under
* each label, total words, and the size of vocabulary.
*
* @method NaiveBayesTextClassifier#stats
* @return {object} An object containing count of samples under
* each label, total words, and the size of vocabulary.
* @example
* myClassifier.stats();
* // -> {
* // labelWiseSamples: {
* // autoloan: 5,
* // prepay: 4
* // },
* // labelWiseWords: {
* // autoloan: 36,
* // prepay: 26
* // },
* // vocabulary: 24
* // };
*/
var stats = function () {
return (
{
// Count of samples under each label.
labelWiseSamples: JSON.parse( JSON.stringify( samples ) ),
// Total words (a single word occuring twice is counted as 2)
// under each label.
labelWiseWords: JSON.parse( JSON.stringify( words ) ),
// Size of the vocubulary.
vocabulary: voc.size
}
);
}; // stats()
// #### Export JSON
// Returns the learnings, without any consolidation check, in JSON format.
/**
* Exports the learning as a JSON, which may be saved as a text file for
* later use via [`importJSON()`](#importjson).
*
* @method NaiveBayesTextClassifier#exportJSON
* @return {string} Learning in JSON format.
* @example
* myClassifier.exportJSON();
* // returns JSON.
*/
var exportJSON = function ( ) {
var vocArray = [];
// Vocubulary set needs to be converted to an array.
voc.forEach( function ( e ) {
vocArray.push( e );
} );
return ( JSON.stringify( [ config, samples, count, words, vocArray ] ) );
}; // exportJSON()
// #### Reset
// Resets the classifier completely by re-initializing all the learning
// related variables, except the preparatory tasks. Useful during cross-
// validation.
/**
* It completely resets the classifier by re-initializing all the learning
* related variables, except the preparatory tasks. It is useful during
* cross fold-validation.
*
* @method NaiveBayesTextClassifier#reset
* @return {boolean} Always true.
* @example
* myClassifier.reset();
* // -> true
*/
var reset = function () {
// Reset values of variables that are associated with learning; Therefore
// `pTasks` & `pTaskCount` are not re-initialized.
samples = Object.create( null );
count = Object.create( null );
words = Object.create( null );
voc = new Set();
labels = null;
labelCount = 0;
learned = false;
consolidated = false;
evaluated = false;
cm = Object.create( null );
precision = Object.create( null );
recall = Object.create( null );
fmeasure = Object.create( null );
return true;
}; // reset()
// #### Import JSON
// Imports the `json` in to learnings after validating the format of input JSON.
// If validation fails then throws error; otherwise on success import it
// returns `true`. Note, importing leads to resetting the classifier.
/**
* Imports an existing JSON learning for prediction.
* It is essential to [`definePrepTasks()`]()#definepreptasks and
* [`consolidate()`](#consolidate) before attempting to predict.
*
* @method NaiveBayesTextClassifier#importJSON
* @param {JSON} json containing learnings in as exported by [`exportJSON`](#exportjson).
* @return {boolean} Always true.
* @throws Error if `json` is invalid.
*/
var importJSON = function ( json ) {
if ( !json ) {
throw Error( 'winkNBTC: undefined or null JSON encountered, import failed!' );
}
// Validate json format
var isOK = [
helpers.object.isObject,
helpers.object.isObject,
helpers.object.isObject,
helpers.object.isObject,
helpers.array.isArray
];
var parsedJSON = JSON.parse( json );
if ( !helpers.array.isArray( parsedJSON ) || parsedJSON.length !== isOK.length ) {
throw Error( 'winkNBTC: invalid JSON encountered, can not import.' );
}
for ( var i = 0; i < isOK.length; i += 1 ) {
if ( !isOK[ i ]( parsedJSON[ i ] ) ) {
throw Error( 'winkNBTC: invalid JSON encountered, can not import.' );
}
}
// All good, setup variable values.
// First reset everything.
reset();
// To prevent config change.
learned = true;
// Load variable values.
config = parsedJSON[ 0 ];
samples = parsedJSON[ 1 ];
count = parsedJSON[ 2 ];
words = parsedJSON[ 3 ];
// Vocabulary is a set!
voc = new Set( parsedJSON[ 4 ] );
// Return success.
return true;
}; // importJSON()
// #### Evaluate
// Evaluates the prediction using the `input` and its known `label`. It
// accordingly updates the confusion matrix. If the `label` is unknown
// then it throws error; errors may be thrown by the `predict()`. If
// prediction fails (unknown), then it does not uppdate
// the confusion matrix and returns `false`; otherwise it updates the matrix
// and returns `true`.
/**
*
* Evaluates the learning against a test data set.
* The `input` is used to predict the class label, which is compared with the
* actual class `label` to populate confusion matrix incrementally.
*
* @method NaiveBayesTextClassifier#evaluate
* @param {String|String[]} input is either text or tokens determined by the
* choice of [`preparatory tasks`](#definePrepTasks).
* @param {string} label of class to which `input` belongs.
* @return {boolean} Always true.
* @example
* myClassifier.evaluate( 'can i close my loan', 'prepay' );
* // -> true
*/
var evaluate = function ( input, label ) {
// In case of unknown label, indicate failure
if ( !samples[ label ] ) {
throw Error( 'winkNBTC: can not evaluate, unknown label enountered: ' + JSON.stringify( label ) );
}
var prediction = predict( input );
// If prediction failed then return false!
if ( prediction === unknown ) return false;
// Update confusion matrix.
if ( prediction === label ) {
cm[ label ][ prediction ] += 1;
} else {
cm[ prediction ][ label ] += 1;
}
evaluated = true;
return true;
}; // evaluate()
// #### metrics
// Computes the metrics from the confusion matrix built during the evaluation
// phase via `evaluate()`. In absence of evaluations, it throws error; otherwise
// it returns an object containing summary metrics along with the details.
/**
*
* Computes a detailed metrics consisting of macro-averaged precision, recall
* and f-measure along with their label-wise values and the confusion matrix.
*
* @method NaiveBayesTextClassifier#metrics
* @return {object} Detailed metrics.
* @example
* // Assuming that evaluation has been already carried out
* JSON.stringify( myClassifier.metrics(), null, 2 );
* // -> {
* // "avgPrecision": 0.75,
* // "avgRecall": 0.75,
* // "avgFMeasure": 0.6667,
* // "details": {
* // "confusionMatrix": {
* // "prepay": {
* // "prepay": 1,
* // "autoloan": 1
* // },
* // "autoloan": {
* // "prepay": 0,
* // "autoloan": 1
* // }
* // },
* // "precision": {
* // "prepay": 0.5,
* // "autoloan": 1
* // },
* // "recall": {
* // "prepay": 1,
* // "autoloan": 0.5
* // },
* // "fmeasure": {
* // "prepay": 0.6667,
* // "autoloan": 0.6667
* // }
* // }
* // }
* @throws Error if attempt to generate metrics is made prior to proper evaluation.
*/
var metrics = function () {
if ( !evaluated ) {
throw Error( 'winkNBTC: metrics can not be computed before evaluation.' );
}
// Numerators for every label; they are same for precision & recall both.
var n = Object.create( null );
// Only denominators differs for precision & recall
var pd = Object.create( null );
var rd = Object.create( null );
// `row` and `col` of confusion matrix.
var row, col;
var i, j;
// Macro average values for metrics.
var avgPrecision = 0;
var avgRecall = 0;
var avgFMeasure = 0;
// Compute label-wise numerators & denominators!
for ( i = 0; i < labelCount; i += 1 ) {
row = labels[ i ];
for ( j = 0; j < labelCount; j += 1 ) {
col = labels[ j ];
if ( row === col ) {
n[ row ] = cm[ row ][ col ];
}
pd[ row ] = cm[ row ][ col ] + ( pd[ row ] || 0 );
rd[ row ] = cm[ col ][ row ] + ( rd[ row ] || 0 );
}
}
// Ready to compute metrics.
for ( i = 0; i < labelCount; i += 1 ) {
row = labels[ i ];
precision[ row ] = +( n[ row ] / pd[ row ] ).toFixed( 4 );
// NaN can occur if a label has not been encountered.
if ( isNaN( precision[ row ] ) ) precision[ row ] = 0;
recall[ row ] = +( n[ row ] / rd[ row ] ).toFixed( 4 );
if ( isNaN( recall[ row ] ) ) recall[ row ] = 0;
fmeasure[ row ] = +( 2 * precision[ row ] * recall[ row ] / ( precision[ row ] + recall[ row ] ) ).toFixed( 4 );
if ( isNaN( fmeasure[ row ] ) ) fmeasure[ row ] = 0;
}
// Compute thier averages, note they will be macro avegages.
for ( i = 0; i < labelCount; i += 1 ) {
avgPrecision += ( precision[ labels[ i ] ] / labelCount );
avgRecall += ( recall[ labels[ i ] ] / labelCount );
avgFMeasure += ( fmeasure[ labels[ i ] ] / labelCount );
}
// Return metrics.
return (
{
// Macro-averaged metrics.
avgPrecision: +avgPrecision.toFixed( 4 ),
avgRecall: +avgRecall.toFixed( 4 ),
avgFMeasure: +avgFMeasure.toFixed( 4 ),
details: {
// Confusion Matrix.
confusionMatrix: cm,
// Label wise metrics details, from those averages were computed.
precision: precision,
recall: recall,
fmeasure: fmeasure
}
}
);
}; // metrics()
methods.learn = learn;
methods.consolidate = consolidate;
methods.computeOdds = computeOdds;
methods.predict = predict;
methods.stats = stats;
methods.definePrepTasks = definePrepTasks;
methods.defineConfig = defineConfig;
methods.evaluate = evaluate;
methods.metrics = metrics;
methods.exportJSON = exportJSON;
methods.importJSON = importJSON;
methods.reset = reset;
return ( methods );
}; // naiveBayesTextClassifier()
// Export textNBC.
module.exports = naiveBayesTextClassifier;