diff --git a/.github/workflows/spec-review.yml b/.github/workflows/spec-review.yml index fc7b3cdd32..8e0b8fc7ac 100644 --- a/.github/workflows/spec-review.yml +++ b/.github/workflows/spec-review.yml @@ -3,6 +3,8 @@ name: OOXML Spec Compliance Review on: pull_request: paths: + - 'packages/super-editor/src/core/super-converter/v2/importer/**' + - 'packages/super-editor/src/core/super-converter/v2/exporter/**' - 'packages/super-editor/src/core/super-converter/v3/handlers/**' concurrency: @@ -31,7 +33,7 @@ jobs: - name: Get changed files id: changed run: | - FILES=$(gh pr diff ${{ github.event.pull_request.number }} --name-only | grep 'v3/handlers/' || true) + FILES=$(gh pr diff ${{ github.event.pull_request.number }} --name-only | grep -E 'v2/(importer|exporter)/|v3/handlers/' || true) echo "files<> $GITHUB_OUTPUT echo "$FILES" >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT diff --git a/packages/super-editor/src/core/super-converter/v2/importer/docxImporter.js b/packages/super-editor/src/core/super-converter/v2/importer/docxImporter.js index ae34942c3c..05684e1747 100644 --- a/packages/super-editor/src/core/super-converter/v2/importer/docxImporter.js +++ b/packages/super-editor/src/core/super-converter/v2/importer/docxImporter.js @@ -167,6 +167,7 @@ export const createDocumentJson = (docx, converter, editor) => { // Safety: drop any inline-only nodes that accidentally landed at the doc root parsedContent = filterOutRootInlineNodes(parsedContent); + parsedContent = normalizeTableBookmarksInContent(parsedContent, editor); collapseWhitespaceNextToInlinePassthrough(parsedContent); const result = { @@ -841,6 +842,210 @@ export function filterOutRootInlineNodes(content = []) { return result; } +/** + * Normalize bookmark nodes that appear as direct table children. + * Moves bookmarkStart/End into the first/last cell textblock of the table. + * + * Some non-conformant DOCX producers place bookmarks as direct table children. + * Per ECMA-376 ยง17.13.6.2, they should be inside cells (bookmarkStart) or + * as children of rows (bookmarkEnd). + * PM can't accept bookmarks as a direct child of table row and that is why + * we relocate them for compatibility. + * + * @param {Array<{type: string, content?: any[], attrs?: any}>} content + * @param {Editor} [editor] + * @returns {Array} + */ +export function normalizeTableBookmarksInContent(content = [], editor) { + if (!Array.isArray(content) || content.length === 0) return content; + + return content.map((node) => normalizeTableBookmarksInNode(node, editor)); +} + +function normalizeTableBookmarksInNode(node, editor) { + if (!node || typeof node !== 'object') return node; + + if (node.type === 'table') { + node = normalizeTableBookmarksInTable(node, editor); + } + + if (Array.isArray(node.content)) { + node.content = normalizeTableBookmarksInContent(node.content, editor); + } + + return node; +} + +function parseColIndex(val) { + if (val == null || val === '') return null; + const n = parseInt(String(val), 10); + return Number.isNaN(n) ? null : Math.max(0, n); +} + +/** colFirst/colLast apply only to bookmarkStart; bookmarkEnd always uses first/last cell by position. */ +function getCellIndexForBookmark(bookmarkNode, position, rowCellCount) { + if (!rowCellCount) return 0; + if (bookmarkNode?.type === 'bookmarkEnd') { + return position === 'start' ? 0 : rowCellCount - 1; + } + const attrs = bookmarkNode?.attrs ?? {}; + const col = parseColIndex(position === 'start' ? attrs.colFirst : attrs.colLast); + if (col == null) return position === 'start' ? 0 : rowCellCount - 1; + return Math.min(col, rowCellCount - 1); +} + +function addBookmarkToRowCellInlines(rowCellInlines, rowIndex, position, bookmarkNode, rowCellCount) { + const cellIndex = getCellIndexForBookmark(bookmarkNode, position, rowCellCount); + const bucket = rowCellInlines[rowIndex][position]; + if (!bucket[cellIndex]) bucket[cellIndex] = []; + bucket[cellIndex].push(bookmarkNode); +} + +function normalizeTableBookmarksInTable(tableNode, editor) { + if (!tableNode || tableNode.type !== 'table' || !Array.isArray(tableNode.content)) return tableNode; + + const rows = tableNode.content.filter((child) => child?.type === 'tableRow'); + if (!rows.length) return tableNode; + + /** @type {{ start: Record, end: Record }[]} */ + const rowCellInlines = rows.map(() => ({ + start: /** @type {Record} */ ({}), + end: /** @type {Record} */ ({}), + })); + const updatedRows = rows.slice(); + let rowCursor = 0; + + const normalizedContent = tableNode.content.reduce((acc, child) => { + if (child?.type === 'tableRow') { + acc.push(updatedRows[rowCursor] ?? child); + rowCursor += 1; + return acc; + } + + if (isBookmarkNode(child)) { + const prevRowIndex = rowCursor > 0 ? rowCursor - 1 : null; + const nextRowIndex = rowCursor < rows.length ? rowCursor : null; + const row = (nextRowIndex ?? prevRowIndex) != null ? rows[nextRowIndex ?? prevRowIndex] : null; + const rowCellCount = row?.content?.length ?? 0; + + if (child.type === 'bookmarkStart') { + if (nextRowIndex != null) + addBookmarkToRowCellInlines(rowCellInlines, nextRowIndex, 'start', child, rowCellCount); + else if (prevRowIndex != null) + addBookmarkToRowCellInlines(rowCellInlines, prevRowIndex, 'end', child, rowCellCount); + } else { + if (prevRowIndex != null) addBookmarkToRowCellInlines(rowCellInlines, prevRowIndex, 'end', child, rowCellCount); + else if (nextRowIndex != null) + addBookmarkToRowCellInlines(rowCellInlines, nextRowIndex, 'start', child, rowCellCount); + } + return acc; + } + + acc.push(child); + return acc; + }, []); + + updatedRows.forEach((row, index) => { + const { start: startByCell, end: endByCell } = rowCellInlines[index]; + const cellIndices = [ + ...new Set([...Object.keys(startByCell).map(Number), ...Object.keys(endByCell).map(Number)]), + ].sort((a, b) => a - b); + for (const cellIndex of cellIndices) { + const startNodes = startByCell[cellIndex]; + const endNodes = endByCell[cellIndex]; + if (startNodes?.length) + updatedRows[index] = insertInlineIntoRow(updatedRows[index], startNodes, editor, 'start', cellIndex); + if (endNodes?.length) + updatedRows[index] = insertInlineIntoRow(updatedRows[index], endNodes, editor, 'end', cellIndex); + } + }); + + // Rebuild content with updated rows (preserve any non-row nodes in place). + let updatedRowIndex = 0; + const rebuiltContent = normalizedContent.map((child) => { + if (child?.type === 'tableRow') { + const replacement = updatedRows[updatedRowIndex] ?? child; + updatedRowIndex += 1; + return replacement; + } + return child; + }); + + return { + ...tableNode, + content: rebuiltContent, + }; +} + +/** + * @param {number} [cellIndex] - If set, insert into this cell; otherwise first (start) or last (end) cell. + */ +function insertInlineIntoRow(rowNode, inlineNodes, editor, position, cellIndex) { + if (!rowNode || !inlineNodes?.length) return rowNode; + + if (!Array.isArray(rowNode.content) || rowNode.content.length === 0) { + const paragraph = { type: 'paragraph', content: inlineNodes }; + const newCell = { type: 'tableCell', content: [paragraph], attrs: {}, marks: [] }; + return { ...rowNode, content: [newCell] }; + } + + const lastCellIndex = rowNode.content.length - 1; + const targetIndex = + cellIndex != null ? Math.min(Math.max(0, cellIndex), lastCellIndex) : position === 'end' ? lastCellIndex : 0; + const targetCell = rowNode.content[targetIndex]; + const updatedCell = insertInlineIntoCell(targetCell, inlineNodes, editor, position); + + if (updatedCell === targetCell) return rowNode; + + const nextContent = rowNode.content.slice(); + nextContent[targetIndex] = updatedCell; + return { ...rowNode, content: nextContent }; +} + +function findTextblockIndex(content, editor, fromEnd) { + const start = fromEnd ? content.length - 1 : 0; + const end = fromEnd ? -1 : content.length; + const step = fromEnd ? -1 : 1; + for (let i = start; fromEnd ? i > end : i < end; i += step) { + if (isTextblockNode(content[i], editor)) return i; + } + return -1; +} + +function insertInlineIntoCell(cellNode, inlineNodes, editor, position) { + if (!cellNode || !inlineNodes?.length) return cellNode; + + const content = Array.isArray(cellNode.content) ? cellNode.content.slice() : []; + const targetIndex = findTextblockIndex(content, editor, position === 'end'); + + if (targetIndex === -1) { + const paragraph = { type: 'paragraph', content: inlineNodes }; + if (position === 'end') content.push(paragraph); + else content.unshift(paragraph); + return { ...cellNode, content }; + } + + const targetBlock = content[targetIndex] || { type: 'paragraph', content: [] }; + const blockContent = Array.isArray(targetBlock.content) ? targetBlock.content.slice() : []; + const nextBlockContent = position === 'end' ? blockContent.concat(inlineNodes) : inlineNodes.concat(blockContent); + + content[targetIndex] = { ...targetBlock, content: nextBlockContent }; + return { ...cellNode, content }; +} + +function isBookmarkNode(node) { + const typeName = node?.type; + return typeName === 'bookmarkStart' || typeName === 'bookmarkEnd'; +} + +function isTextblockNode(node, editor) { + const typeName = node?.type; + if (!typeName) return false; + const nodeType = editor?.schema?.nodes?.[typeName]; + if (nodeType && typeof nodeType.isTextblock === 'boolean') return nodeType.isTextblock; + return typeName === 'paragraph'; +} + /** * Reconstruct original OOXML for preservable inline nodes using their attribute decoders. * diff --git a/packages/super-editor/src/core/super-converter/v2/importer/docxImporter.test.js b/packages/super-editor/src/core/super-converter/v2/importer/docxImporter.test.js index 0f7327b5a8..61029204cc 100644 --- a/packages/super-editor/src/core/super-converter/v2/importer/docxImporter.test.js +++ b/packages/super-editor/src/core/super-converter/v2/importer/docxImporter.test.js @@ -1,5 +1,9 @@ import { describe, it, expect } from 'vitest'; -import { collapseWhitespaceNextToInlinePassthrough, filterOutRootInlineNodes } from './docxImporter.js'; +import { + collapseWhitespaceNextToInlinePassthrough, + filterOutRootInlineNodes, + normalizeTableBookmarksInContent, +} from './docxImporter.js'; const n = (type, attrs = {}) => ({ type, attrs, marks: [] }); @@ -178,3 +182,114 @@ describe('collapseWhitespaceNextToInlinePassthrough', () => { expect(tree[0].content[2].content[0].text).toBe('bar'); }); }); + +describe('normalizeTableBookmarksInContent', () => { + const table = (content) => ({ type: 'table', content, attrs: {}, marks: [] }); + const row = (cells) => ({ type: 'tableRow', content: cells, attrs: {}, marks: [] }); + const cell = (content) => ({ type: 'tableCell', content, attrs: {}, marks: [] }); + const paragraph = (content) => ({ type: 'paragraph', content, attrs: {}, marks: [] }); + const text = (value) => ({ type: 'text', text: value, marks: [] }); + const bookmarkStart = (id, attrs = {}) => ({ type: 'bookmarkStart', attrs: { id, ...attrs } }); + const bookmarkEnd = (id, attrs = {}) => ({ type: 'bookmarkEnd', attrs: { id, ...attrs } }); + + it('moves leading bookmarkStart into the first cell paragraph', () => { + const input = [table([bookmarkStart('b1'), row([cell([paragraph([text('Cell')])])])])]; + + const result = normalizeTableBookmarksInContent(input); + const normalizedTable = result[0]; + + expect(normalizedTable.content.some((node) => node.type === 'bookmarkStart')).toBe(false); + const paraContent = normalizedTable.content[0].content[0].content[0].content; + expect(paraContent[0]).toMatchObject({ type: 'bookmarkStart', attrs: { id: 'b1' } }); + expect(paraContent[1]).toMatchObject({ type: 'text', text: 'Cell' }); + }); + + it('moves trailing bookmarkEnd into the last cell paragraph', () => { + const input = [table([row([cell([paragraph([text('Cell')])])]), bookmarkEnd('b1')])]; + + const result = normalizeTableBookmarksInContent(input); + const normalizedTable = result[0]; + + expect(normalizedTable.content.some((node) => node.type === 'bookmarkEnd')).toBe(false); + const paraContent = normalizedTable.content[0].content[0].content[0].content; + expect(paraContent[0]).toMatchObject({ type: 'text', text: 'Cell' }); + expect(paraContent[1]).toMatchObject({ type: 'bookmarkEnd', attrs: { id: 'b1' } }); + }); + + it('moves bookmarkStart and bookmarkEnd into the same cell when no textblocks exist', () => { + const input = [table([bookmarkStart('b1'), row([cell([])]), bookmarkEnd('b1')])]; + + const result = normalizeTableBookmarksInContent(input); + const normalizedTable = result[0]; + + expect(normalizedTable.content.some((node) => node.type === 'bookmarkStart')).toBe(false); + expect(normalizedTable.content.some((node) => node.type === 'bookmarkEnd')).toBe(false); + + const paraContent = normalizedTable.content[0].content[0].content[0].content; + expect(paraContent).toEqual([ + { type: 'bookmarkStart', attrs: { id: 'b1' } }, + { type: 'bookmarkEnd', attrs: { id: 'b1' } }, + ]); + }); + + it('anchors bookmark boundaries to adjacent rows when markers appear between rows', () => { + const input = [ + table([ + bookmarkStart('b1'), + row([cell([paragraph([text('R1')])])]), + bookmarkEnd('b1'), + row([cell([paragraph([text('R2')])])]), + ]), + ]; + + const result = normalizeTableBookmarksInContent(input); + const normalizedTable = result[0]; + + const row1Content = normalizedTable.content[0].content[0].content[0].content; + expect(row1Content).toEqual([ + { type: 'bookmarkStart', attrs: { id: 'b1' } }, + { type: 'text', text: 'R1', marks: [] }, + { type: 'bookmarkEnd', attrs: { id: 'b1' } }, + ]); + + const row2Content = normalizedTable.content[1].content[0].content[0].content; + expect(row2Content).toEqual([{ type: 'text', text: 'R2', marks: [] }]); + }); + + it('creates a cell when a row is empty', () => { + const input = [table([bookmarkStart('b1'), row([]), bookmarkEnd('b1')])]; + + const result = normalizeTableBookmarksInContent(input); + const normalizedTable = result[0]; + + const rowContent = normalizedTable.content[0].content; + expect(rowContent).toHaveLength(1); + expect(rowContent[0].type).toBe('tableCell'); + + const paraContent = rowContent[0].content[0].content; + expect(paraContent).toEqual([ + { type: 'bookmarkStart', attrs: { id: 'b1' } }, + { type: 'bookmarkEnd', attrs: { id: 'b1' } }, + ]); + }); + + it('places bookmarkStart in the cell indicated by colFirst when present; bookmarkEnd uses first/last cell only', () => { + const twoCells = row([cell([paragraph([text('A')])]), cell([paragraph([text('B')])])]); + const input = [table([bookmarkStart('b1', { colFirst: '1' }), twoCells, bookmarkEnd('b1')])]; + + const result = normalizeTableBookmarksInContent(input); + const normalizedTable = result[0]; + const rowContent = normalizedTable.content[0].content; + + expect(normalizedTable.content.some((node) => node.type === 'bookmarkStart')).toBe(false); + expect(normalizedTable.content.some((node) => node.type === 'bookmarkEnd')).toBe(false); + + const firstCellContent = rowContent[0].content[0].content; + expect(firstCellContent).toEqual([{ type: 'text', text: 'A', marks: [] }]); + + const secondCellContent = rowContent[1].content[0].content; + expect(secondCellContent[0]).toMatchObject({ type: 'bookmarkStart', attrs: { id: 'b1', colFirst: '1' } }); + expect(secondCellContent[1]).toMatchObject({ type: 'text', text: 'B', marks: [] }); + expect(secondCellContent[2]).toMatchObject({ type: 'bookmarkEnd', attrs: { id: 'b1' } }); + }); +});