Skip to content

Commit ad31626

Browse files
committed
perf: improve OdfTableRow cell count performance
* Issue was mentioned in #25 * Do not iterate over all cells which is slow for Excel generated spreadsheets which have 16384 columns by default. * Rely on `number-columns-repeated` attribute to count the _real_ cells. * ATTN: this gives a different result for merged cells over multiple rows compared to the previous implementation. However, this count seems more in line with the actual content XML.
1 parent c0306d2 commit ad31626

File tree

4 files changed

+103
-9
lines changed

4 files changed

+103
-9
lines changed

odfdom/src/main/java/org/odftoolkit/odfdom/doc/table/OdfTableRow.java

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -287,16 +287,18 @@ public OdfTableCell getCellByIndex(int index) {
287287
* @return the cell count
288288
*/
289289
public int getCellCount() {
290-
OdfTable table = getTable();
291-
Set<OdfTableCell> realCells = new HashSet<>();
292-
List<CellCoverInfo> coverList =
293-
table.getCellCoverInfos(0, 0, table.getColumnCount() - 1, table.getRowCount() - 1);
294-
int rowIndex = getRowIndex();
295-
for (int i = 0; i < table.getColumnCount(); i++) {
296-
OdfTableCell cell = table.getOwnerCellByPosition(coverList, i, rowIndex);
297-
realCells.add(cell);
290+
// count cells by skipping covered-table-cell and taking into account number-columns-repeated attribute
291+
int cellCount = 0;
292+
for (Node node : new DomNodeList(maRowElement.getChildNodes())) {
293+
if (node instanceof TableTableCellElement tableCell) {
294+
if (tableCell.getTableNumberColumnsRepeatedAttribute() == null) {
295+
cellCount++;
296+
} else {
297+
cellCount += tableCell.getTableNumberColumnsRepeatedAttribute();
298+
}
299+
}
298300
}
299-
return realCells.size();
301+
return cellCount;
300302
}
301303

302304
/**
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
package org.odftoolkit.odfdom.doc.table;
2+
3+
import static org.junit.Assert.assertEquals;
4+
import static org.odftoolkit.odfdom.utils.ResourceUtilities.getAbsoluteInputPath;
5+
6+
import junit.framework.AssertionFailedError;
7+
import org.junit.Test;
8+
import org.odftoolkit.odfdom.doc.OdfSpreadsheetDocument;
9+
10+
public class TableCellCountTest {
11+
12+
// The number of columns that Excel always uses.
13+
// For example Excel puts <table:table-cell table:number-columns-repeated="16384"/> if no cell values or covered cells
14+
private static final int EXCEL_COLUMN_COUNT = 16384;
15+
16+
@Test
17+
public void verifyCellCountForLibreOfficeGeneratedSpreadsheet() {
18+
// Spreadsheet created by LibreOffice on Mac version 25.8.4.2 (AARCH64)
19+
try (OdfSpreadsheetDocument spreadsheet = loadSpreadsheetDocument("TestLibreOfficeSpreadsheetTableCellCount.ods")) {
20+
OdfTable sheet = spreadsheet.getSpreadsheetTables().get(0);
21+
assertEquals(3, sheet.getColumnCount());
22+
23+
// 3 cells merged so 2 covered cells
24+
assertEquals(3 - 2 /* covered cells */, sheet.getRowByIndex(0).getCellCount());
25+
26+
// 2 cells merged so 1 covered cell
27+
assertEquals(3 - 1 /* covered cell */, sheet.getRowByIndex(1).getCellCount());
28+
29+
// no merged cells
30+
assertEquals(3, sheet.getRowByIndex(2).getCellCount());
31+
32+
// 2 cells merged over 2 rows (simplified XML):
33+
// <table:table-row>
34+
// <table:table-cell table:number-columns-spanned="2" table:number-rows-spanned="2">
35+
// <text:p>1</text:p>
36+
// </table:table-cell>
37+
// <table:covered-table-cell/>
38+
// <table:table-cell>
39+
// <text:p>2</text:p>
40+
// </table:table-cell>
41+
// </table:table-row>
42+
// <table:table-row>
43+
// <table:covered-table-cell table:number-columns-repeated="2"/>
44+
// <table:table-cell>
45+
// <text:p>2</text:p>
46+
// </table:table-cell>
47+
// </table:table-row>
48+
// so 1 covered cell in first row
49+
assertEquals(3 - 1 /* covered cell */, sheet.getRowByIndex(3).getCellCount());
50+
// ... and 2 covered cells in second row
51+
assertEquals(3 - 2 /* covered cells */, sheet.getRowByIndex(4).getCellCount());
52+
}
53+
}
54+
55+
@Test
56+
public void verifyCellCountForExcelGeneratedSpreadsheet() {
57+
// Spreadsheet created by Microsoft® Excel for Mac Version 16.106 (26020821)
58+
try (OdfSpreadsheetDocument spreadsheet = loadSpreadsheetDocument("TestExcelSpreadsheetTableCellCount.ods")) {
59+
OdfTable sheet = spreadsheet.getSpreadsheetTables().get(0);
60+
assertEquals(EXCEL_COLUMN_COUNT, sheet.getColumnCount());
61+
62+
// 3 cells merged so 2 covered cells
63+
assertEquals(EXCEL_COLUMN_COUNT - 2 /* covered cells */, sheet.getRowByIndex(0).getCellCount());
64+
65+
// 2 cells merged so 1 covered cell
66+
assertEquals(EXCEL_COLUMN_COUNT - 1 /* covered cell */, sheet.getRowByIndex(1).getCellCount());
67+
68+
// no merged cells
69+
assertEquals(EXCEL_COLUMN_COUNT, sheet.getRowByIndex(2).getCellCount());
70+
71+
// 2 cells merged over 2 rows so 1 covered cell in first row
72+
assertEquals(EXCEL_COLUMN_COUNT - 1 /* covered cell */, sheet.getRowByIndex(3).getCellCount());
73+
// ... and 2 covered cells in second row
74+
assertEquals(EXCEL_COLUMN_COUNT - 2 /* covered cells */, sheet.getRowByIndex(4).getCellCount());
75+
76+
// Excel always adds
77+
// <table:table-row table:number-rows-repeated="1048573" table:style-name="ro1">
78+
// <table:table-cell table:number-columns-repeated="16384"/>
79+
// </table:table-row>
80+
assertEquals(EXCEL_COLUMN_COUNT, sheet.getRowByIndex(5).getCellCount());
81+
}
82+
}
83+
84+
private OdfSpreadsheetDocument loadSpreadsheetDocument(String filename) {
85+
try {
86+
return OdfSpreadsheetDocument.loadDocument(
87+
getAbsoluteInputPath(filename));
88+
} catch (Exception ex) {
89+
throw new AssertionFailedError(ex.getMessage());
90+
}
91+
}
92+
}
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)