JSDoc: Source: lexRank.js

/* LexRank for Text Summarization:
LexRank is a graph-based summarization method inspired by the PageRank algorithm used in web search engines.
It constructs a graph representation of the text, where vertices represent sentences, and edges represent semantic similarity between sentences.
The algorithm then computes a centrality score for each sentence, indicating its importance within the text.
Highly scored sentences are selected to form the summary, capturing the most salient information.
*/

const natural = require("natural");
const SentenceTokenizer = natural.SentenceTokenizer;
const TfIdf = natural.TfIdf;
const { manageErrors } = require("./errors.js");
const {
  getSentimentRankAdjustment,
  calculateAdjustedRank,
  getTfIdfVectors,
  cosineSimilarity,
} = require("./shared.js");
const { getSentiment } = require("./sentimentAnalysis.js");

/**
 * Generates a sentiment-aware summary using the LexRank algorithm. Prioritizes sentences with strong positive or negative sentiment.
 *
 * @param {string} text - The input text for summarization.
 * @param {number} [numberOfSentences=5] -  Desired number of sentences in the summary.
 * @param {number} [positiveSentimentThreshold=0] - Minimum sentiment score to consider a sentence positive.
 * @param {number} [negativeSentimentThreshold=0] - Maximum sentiment score to consider a sentence negative.
 * @param {number} [positiveRankBoost=0] - Boost applied to the ranking of positive sentences.
 * @param {number} [negativeRankBoost=0] - Boost applied to the ranking of negative sentences.
 * @returns {string} The generated summary.
 * @throws {Error} If any input parameters are invalid (delegated to 'manageErrors').
 *
 * @example
 *
 * const review = "The food was delicious! The service was slow, but overall it was a great experience.";
 * const summary = await sentimentLexRankSummary(review, 2);
 * console.log(summary);
 */

async function sentimentLexRankSummary(
  text,
  numberOfSentences = 5,
  positiveSentimentThreshold = 0,
  negativeSentimentThreshold = 0,
  positiveRankBoost = 0,
  negativeRankBoost = 0
) {
  manageErrors(
    text,
    numberOfSentences,
    positiveSentimentThreshold,
    negativeSentimentThreshold,
    positiveRankBoost,
    negativeRankBoost
  );

  // Tokenize the text into sentences
  const sentenceTokenizer = new SentenceTokenizer();
  let sentences = sentenceTokenizer.tokenize(text);

  // Calculate LexRank scores
  let sentenceDetails = lexRankSentences(sentences);

  // Compute sentiments and adjust ranks asynchronously
  const sentimentDetails = await Promise.all(
    sentenceDetails.map(async (details) => {
      const sentimentScore = await getSentiment(details.sentence);

      const sentimentRankAdjustment = getSentimentRankAdjustment(
        sentimentScore,
        positiveSentimentThreshold,
        negativeSentimentThreshold,
        positiveRankBoost,
        negativeRankBoost
      );

      return {
        ...details,
        rank: calculateAdjustedRank(details.rank, sentimentRankAdjustment),
      };
    })
  );

  // Sort sentences by the modified rank
  sentimentDetails.sort((a, b) => b.rank - a.rank);

  // Select the top N sentences and return them as a single string
  return sentimentDetails
    .slice(0, numberOfSentences)
    .map((details) => details.sentence)
    .join(" ");
}

/**
 * Computes LexRank scores for sentences to determine their importance within a text.
 *
 * @param {string[]} sentences - An array of sentences from the input text.
 * @returns {object[]} Array of objects, each containing:
 *   * sentence: The original sentence.
 *   * rank: The calculated LexRank importance score.
 */

function lexRankSentences(sentences) {
  let tfidf = new TfIdf();
  sentences.forEach((sentence) => tfidf.addDocument(sentence));
  let vectors = getTfIdfVectors(tfidf); // Ensure this returns a proper list of vectors

  // Compute the cosine similarity matrix
  let similarityMatrix = vectors.map((vecA) =>
    vectors.map((vecB) => cosineSimilarity(vecA, vecB))
  );

  let threshold = calculateThreshold(similarityMatrix);

  // Apply threshold to similarity matrix and calculate degrees
  let degrees = Array(sentences.length).fill(0);
  similarityMatrix = similarityMatrix.map((row, i) =>
    row.map((score, j) => {
      if (score > threshold && i !== j) {
        degrees[i]++;
        return 1;
      }
      return 0;
    })
  );

  // Check if any degrees are zero and adjust them to avoid division by zero
  degrees = degrees.map((degree) => (degree === 0 ? 1 : degree));

  // Use power method to approximate principal eigenvector
  let ranks = powerMethod(similarityMatrix, degrees, 0.85, 100);

  return sentences.map((sentence, index) => ({
    sentence,
    rank: ranks[index],
  }));
}

/**
 * Calculates a similarity threshold for building the LexRank graph.
 *
 * @param {number[][]} similarityMatrix - A matrix representing the cosine similarities between sentences.
 * @returns {number} The average similarity score, used as a threshold.
 */

function calculateThreshold(similarityMatrix) {
  let scores = similarityMatrix.flat();
  return scores.reduce((a, b) => a + b, 0) / scores.length;
}

/**
 * Approximates the principal eigenvector (representing sentence importance) of a graph using the power iteration method.
 *
 * @param {number[][]} matrix -  Represents a graph, often an adjacency or similarity matrix.
 * @param {number[]} degrees  - Represents the out-degrees of each node in the graph.
 * @param {number} [damping=0.85]  - Damping factor to prevent oscillations (probability of a 'random jump').
 * @param {number} [maxIter=100] -  Maximum number of iterations for convergence.
 * @returns {number[]}  An array of scores approximating the importance of each node (sentence).
 */

function powerMethod(matrix, degrees, damping = 0.85, maxIter = 100) {
  let N = matrix.length;
  let p = Array(N).fill(1 / N); // Start with an equal probability for each node
  for (let iter = 0; iter < maxIter; iter++) {
    let newP = Array(N).fill((1 - damping) / N); // Ensure the random teleportation is uniformly distributed
    for (let i = 0; i < N; i++) {
      for (let j = 0; j < N; j++) {
        newP[i] += ((damping * matrix[j][i]) / degrees[j]) * p[j];
      }
    }
    p = newP;
  }
  return p;
}

module.exports = { sentimentLexRankSummary };