| import { visit } from 'unist-util-visit'; |
| import type { Node } from 'unist'; |
| import type { Citation, CitationNode } from './types'; |
| import { SPAN_REGEX, STANDALONE_PATTERN, CLEANUP_REGEX, COMPOSITE_REGEX } from '~/utils/citations'; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| function isStandaloneMarker(text: string, position: number): boolean { |
| const beforeText = text.substring(0, position); |
|
|
| |
| const lastUe200Literal = beforeText.lastIndexOf('\\ue200'); |
| const lastUe200Char = beforeText.lastIndexOf('\ue200'); |
| const lastUe200 = Math.max(lastUe200Literal, lastUe200Char); |
|
|
| |
| const lastUe201Literal = beforeText.lastIndexOf('\\ue201'); |
| const lastUe201Char = beforeText.lastIndexOf('\ue201'); |
| const lastUe201 = Math.max(lastUe201Literal, lastUe201Char); |
|
|
| |
| return lastUe200 === -1 || (lastUe201 !== -1 && lastUe201 > lastUe200); |
| } |
|
|
| |
| |
| |
| function findNextMatch( |
| text: string, |
| position: number, |
| ): { type: string; match: RegExpExecArray | null; index: number } | null { |
| |
| SPAN_REGEX.lastIndex = position; |
| COMPOSITE_REGEX.lastIndex = position; |
| STANDALONE_PATTERN.lastIndex = position; |
|
|
| |
| const spanMatch = SPAN_REGEX.exec(text); |
| const compositeMatch = COMPOSITE_REGEX.exec(text); |
|
|
| |
| let standaloneMatch: RegExpExecArray | null = null; |
| STANDALONE_PATTERN.lastIndex = position; |
|
|
| |
| let match: RegExpExecArray | null; |
| while (!standaloneMatch && (match = STANDALONE_PATTERN.exec(text)) !== null) { |
| if (isStandaloneMarker(text, match.index)) { |
| standaloneMatch = match; |
| } |
| } |
|
|
| |
| let nextMatch: RegExpExecArray | null = null; |
| let matchType = ''; |
| let matchIndex = -1; |
| let typeIndex = -1; |
|
|
| if (spanMatch && (!nextMatch || spanMatch.index < matchIndex || matchIndex === -1)) { |
| nextMatch = spanMatch; |
| matchType = 'span'; |
| matchIndex = spanMatch.index; |
| |
| typeIndex = 0; |
| } |
|
|
| if (compositeMatch && (!nextMatch || compositeMatch.index < matchIndex || matchIndex === -1)) { |
| nextMatch = compositeMatch; |
| matchType = 'composite'; |
| matchIndex = compositeMatch.index; |
| typeIndex = 0; |
| } |
|
|
| if (standaloneMatch && (!nextMatch || standaloneMatch.index < matchIndex || matchIndex === -1)) { |
| nextMatch = standaloneMatch; |
| matchType = 'standalone'; |
| matchIndex = standaloneMatch.index; |
| typeIndex = 0; |
| } |
|
|
| if (!nextMatch) return null; |
|
|
| return { type: matchType, match: nextMatch, index: typeIndex }; |
| } |
|
|
| function processTree(tree: Node) { |
| visit(tree, 'text', (node, index, parent) => { |
| const textNode = node as CitationNode; |
| const parentNode = parent as CitationNode; |
|
|
| if (typeof textNode.value !== 'string') return; |
|
|
| const originalValue = textNode.value; |
| const segments: Array<CitationNode> = []; |
|
|
| |
| let currentPosition = 0; |
|
|
| |
| |
| const citationIds = new Map<number, string>(); |
| const typeCounts = { span: 0, composite: 0, standalone: 0 }; |
|
|
| while (currentPosition < originalValue.length) { |
| const nextMatchInfo = findNextMatch(originalValue, currentPosition); |
|
|
| if (!nextMatchInfo) { |
| |
| const remainingText = originalValue.substring(currentPosition).replace(CLEANUP_REGEX, ''); |
| if (remainingText) { |
| segments.push({ type: 'text', value: remainingText }); |
| } |
| break; |
| } |
|
|
| const { type, match } = nextMatchInfo; |
| const matchIndex = match!.index; |
| const matchText = match![0]; |
|
|
| |
| if (matchIndex > currentPosition) { |
| const textBeforeMatch = originalValue |
| .substring(currentPosition, matchIndex) |
| .replace(CLEANUP_REGEX, ''); |
|
|
| if (textBeforeMatch) { |
| segments.push({ type: 'text', value: textBeforeMatch }); |
| } |
| } |
|
|
| |
| const citationId = `${type}-${typeCounts[type as keyof typeof typeCounts]}-${matchIndex}`; |
| citationIds.set(matchIndex, citationId); |
|
|
| |
| switch (type) { |
| case 'span': { |
| const spanText = matchText; |
| const cleanText = spanText.replace(/\\ue203|\\ue204/g, ''); |
|
|
| |
| let associatedCitationId: string | null = null; |
| const endOfSpan = matchIndex + matchText.length; |
|
|
| |
| const nextCitation = findNextMatch(originalValue, endOfSpan); |
| if ( |
| nextCitation && |
| (nextCitation.type === 'standalone' || nextCitation.type === 'composite') && |
| nextCitation.match!.index - endOfSpan < 5 |
| ) { |
| |
| const nextIndex = nextCitation.match!.index; |
| const nextType = nextCitation.type; |
| associatedCitationId = `${nextType}-${typeCounts[nextType as keyof typeof typeCounts]}-${nextIndex}`; |
| } |
|
|
| segments.push({ |
| type: 'highlighted-text', |
| data: { |
| hName: 'highlighted-text', |
| hProperties: { citationId: associatedCitationId }, |
| }, |
| children: [{ type: 'text', value: cleanText }], |
| }); |
|
|
| typeCounts.span++; |
| break; |
| } |
|
|
| case 'composite': { |
| const compositeText = matchText; |
|
|
| |
| const compositeRefRegex = new RegExp(STANDALONE_PATTERN.source, 'g'); |
| let refMatch: RegExpExecArray | null; |
| const citations: Array<Citation> = []; |
|
|
| while ((refMatch = compositeRefRegex.exec(compositeText)) !== null) { |
| const turn = Number(refMatch[1]); |
| const refType = refMatch[2]; |
| const refIndex = Number(refMatch[3]); |
|
|
| citations.push({ |
| turn, |
| refType, |
| index: refIndex, |
| }); |
| } |
|
|
| if (citations.length > 0) { |
| segments.push({ |
| type: 'composite-citation', |
| data: { |
| hName: 'composite-citation', |
| hProperties: { |
| citations, |
| citationId: citationId, |
| }, |
| }, |
| }); |
| } |
|
|
| typeCounts.composite++; |
| break; |
| } |
|
|
| case 'standalone': { |
| |
| const turn = Number(match![1]); |
| const refType = match![2]; |
| const refIndex = Number(match![3]); |
|
|
| segments.push({ |
| type: 'citation', |
| data: { |
| hName: 'citation', |
| hProperties: { |
| citation: { |
| turn, |
| refType, |
| index: refIndex, |
| }, |
| citationType: 'standalone', |
| citationId: citationId, |
| }, |
| }, |
| }); |
|
|
| typeCounts.standalone++; |
| break; |
| } |
| } |
|
|
| |
| currentPosition = matchIndex + matchText.length; |
| } |
|
|
| |
| if (segments.length > 0 && index !== undefined) { |
| parentNode.children?.splice(index, 1, ...segments); |
| return index + segments.length; |
| } else if (textNode.value !== textNode.value.replace(CLEANUP_REGEX, '')) { |
| |
| textNode.value = textNode.value.replace(CLEANUP_REGEX, ''); |
| } |
| }); |
| } |
|
|
| export function unicodeCitation() { |
| return (tree: Node) => { |
| processTree(tree); |
| }; |
| } |
|
|