import React from 'react';

import excerpt from '../../utils/excerpt';
import reportError from '../../utils/sentry';
import type { HighlightResult } from '../behavior';

/**
 * Algolia highlights matched terms in strings using <em> tags. We don't want to
 * use the raw HTML string they give us. First, that would require us to fully
 * trust what's coming from Algolia and use dangerouslySetInnerHTML [1]. There'd
 * be nothing technical preventing Algolia from injecting arbitrary content,
 * including scripts, into our pages. Second, we're unable to do any further
 * processing of the raw string. Instead, we extract the boundaries so that we
 * can separately manipulate the matched terms and their surrounding context.
 *
 * [1]: https://reactjs.org/docs/dom-elements.html#dangerouslysetinnerhtml
 */
const highlightBoundaryRegex = /<\/?em>/gu;

/** Gets the original string value by removing any highlighting <em> tags. */
export function unhighlight(highlighted: string): string {
	return highlighted.replace(highlightBoundaryRegex, '');
}

type HighlightProps = { readonly string: string };

/** Highlight all of the matched terms in a string. */
export default function Highlight({ string }: HighlightProps): JSX.Element {
	return <HighlightParts parts={splitMatches(string)} />;
}

/** Highlight all of the matched terms in contextual excerpts from a string. */
export function HighlightExcerpt({ string }: HighlightProps): JSX.Element {
	let parts = splitMatches(string);

	// In case excerpting fails, log the exception and fall back to show the
	// full highlighted string.
	try {
		parts = excerpt(parts);
	} catch (error) {
		reportError(error);
	}

	return <HighlightParts parts={parts} />;
}

/** @private */
function splitMatches(string: string): Array<string> {
	// First, split the string into its parts. Even-index substrings are normal
	// text, and the odd-index substrings match the search and should be
	// highlighted.
	//
	// Yes, I know you can't parse HTML using regular expressions. I'm not
	// parsing HTML. As long as Algolia doesn't lie to us, this is fine.
	return string.split(highlightBoundaryRegex);
}

type HighlightPartsProps = { readonly parts: Array<string> };

/** @private */
function HighlightParts({ parts }: HighlightPartsProps) {
	return (
		<React.Fragment>
			{parts.map((part, index) => {
				const key = `${index}/${part}`;
				if (index % 2 === 1) {
					return <strong key={key}>{part}</strong>;
				} else {
					return <React.Fragment key={key}>{part}</React.Fragment>;
				}
			})}
		</React.Fragment>
	);
}

/** Choose the strongest match value from an array of highlight results. */
export function getBestHighlightResultValue(
	results: Array<HighlightResult> | undefined,
	fallback: string,
): string {
	if (Array.isArray(results)) {
		const matches = results.filter(
			(result) => result.matchLevel !== 'none',
		);
		if (matches.length > 0) {
			matches.sort((a, b) => {
				// Sort full match a before partial match b.
				if (a.matchLevel === 'full' && b.matchLevel === 'partial') {
					return -1;
				}
				// Sort partial match a after full match b.
				if (a.matchLevel === 'partial' && b.matchLevel === 'full') {
					return 1;
				}
				// If tied for match level, prefer the more complete match.
				if (a.matchedWords.length !== b.matchedWords.length) {
					// Negative numbers sort a then b, positive b then a.
					return b.matchedWords.length - a.matchedWords.length;
				}
				// If still tied, prefer the shortest result, which will have
				// the highest term frequency-inverse document frequency
				// (Google it). Calculate length after removing Algolia's
				// <em>...</em> tags which aren't part of the visible text.
				return (
					unhighlight(a.value).length - unhighlight(b.value).length
				);
			});
			return matches[0].value;
		}
	}

	return fallback;
}
