1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 package com.rapiddweller.platform.xls;
28
29 import com.rapiddweller.benerator.engine.BeneratorContext;
30 import com.rapiddweller.common.ArrayUtil;
31 import com.rapiddweller.common.CollectionUtil;
32 import com.rapiddweller.common.ConfigurationError;
33 import com.rapiddweller.common.Converter;
34 import com.rapiddweller.common.IOUtil;
35 import com.rapiddweller.common.StringUtil;
36 import com.rapiddweller.common.converter.ConverterManager;
37 import com.rapiddweller.common.converter.ToStringConverter;
38 import com.rapiddweller.format.DataContainer;
39 import com.rapiddweller.format.DataIterator;
40 import com.rapiddweller.format.util.OrthogonalArrayIterator;
41 import com.rapiddweller.format.util.ThreadLocalDataContainer;
42 import com.rapiddweller.format.xls.XLSLineIterator;
43 import com.rapiddweller.model.data.ComplexTypeDescriptor;
44 import com.rapiddweller.model.data.ComponentDescriptor;
45 import com.rapiddweller.model.data.DataModel;
46 import com.rapiddweller.model.data.Entity;
47 import com.rapiddweller.platform.array.Array2EntityConverter;
48 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
49 import org.apache.poi.ss.usermodel.Sheet;
50 import org.apache.poi.ss.usermodel.Workbook;
51 import org.apache.poi.ss.usermodel.WorkbookFactory;
52
53 import java.io.IOException;
54 import java.util.ArrayList;
55 import java.util.List;
56
57
58
59
60
61
62
63
64 public class SingleSheetXLSEntityIterator implements DataIterator<Entity> {
65
66 private String uri;
67 private final Workbook workbook;
68 private final boolean rowBased;
69 private final boolean formatted;
70 private final String emptyMarker;
71 private DataIterator<Object[]> source;
72 private final Converter<String, ?> preprocessor;
73 private Array2EntityConverter converter;
74 private Object[] buffer;
75 private final ThreadLocalDataContainer<Object[]> sourceContainer =
76 new ThreadLocalDataContainer<>();
77 private ComplexTypeDescriptor entityDescriptor;
78 private final BeneratorContext context;
79 private String[] headers;
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98 public SingleSheetXLSEntityIterator(String uri, String sheetName,
99 Converter<String, ?> preprocessor,
100 ComplexTypeDescriptor entityType,
101 BeneratorContext context,
102 boolean rowBased, boolean formatted,
103 String emptyMarker)
104 throws InvalidFormatException, IOException {
105 this(loadSheet(uri, sheetName), preprocessor, entityType, context,
106 rowBased, formatted, emptyMarker);
107 this.uri = uri;
108 }
109
110
111
112
113
114
115
116
117
118
119
120
121 public SingleSheetXLSEntityIterator(Sheet sheet,
122 Converter<String, ?> preprocessor,
123 ComplexTypeDescriptor entityDescriptor,
124 BeneratorContext context,
125 boolean rowBased, boolean formatted,
126 String emptyMarker) {
127 this.workbook = sheet.getWorkbook();
128 this.preprocessor = preprocessor;
129 this.context = context;
130 this.rowBased = rowBased;
131 this.formatted = formatted;
132 this.emptyMarker = emptyMarker;
133 this.source = createRawIterator(sheet, rowBased, preprocessor);
134
135
136 this.entityDescriptor = entityDescriptor;
137 if (this.entityDescriptor == null) {
138 String entityTypeName = sheet.getSheetName();
139 if (context != null) {
140 DataModel dataModel = context.getDataModel();
141 this.entityDescriptor = (ComplexTypeDescriptor) dataModel
142 .getTypeDescriptor(entityTypeName);
143 if (this.entityDescriptor != null) {
144 this.entityDescriptor =
145 new ComplexTypeDescriptor(entityTypeName + "_",
146 context.getLocalDescriptorProvider());
147 } else {
148 this.entityDescriptor = createDescriptor(entityTypeName);
149 }
150 } else {
151 this.entityDescriptor = createDescriptor(entityTypeName);
152 }
153 }
154
155
156 parseHeaders();
157 if (headers == null) {
158 this.source = null;
159 return;
160 }
161
162
163 DataContainer<Object[]> tmp = this.source.next(sourceContainer.get());
164 if (tmp == null) {
165 this.source = null;
166 return;
167 }
168 this.buffer = tmp.getData();
169 converter = new Array2EntityConverter(this.entityDescriptor, headers,
170 false);
171 }
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191 public static List<Entity> parseAll(String uri, String sheetName,
192 Converter<String, ?> preprocessor,
193 ComplexTypeDescriptor type,
194 BeneratorContext context,
195 boolean rowBased, boolean formatted,
196 String emptyMarker)
197 throws IOException, InvalidFormatException {
198 Sheet sheet = loadSheet(uri, sheetName);
199 return parseAll(sheet, preprocessor, type, context, rowBased, formatted,
200 emptyMarker);
201 }
202
203
204
205
206
207
208
209
210
211
212
213
214
215 public static List<Entity> parseAll(Sheet sheet,
216 Converter<String, ?> preprocessor,
217 ComplexTypeDescriptor type,
218 BeneratorContext context,
219 boolean rowBased, boolean formatted,
220 String emptyMarker) {
221 List<Entity> list = new ArrayList<>();
222 SingleSheetXLSEntityIterator iterator =
223 new SingleSheetXLSEntityIterator(sheet, preprocessor, type,
224 context, rowBased, formatted, emptyMarker);
225 DataContainer<Entity> container = new DataContainer<>();
226 while ((container = iterator.next(container)) != null) {
227 list.add(container.getData());
228 }
229 return list;
230 }
231
232 private static Sheet loadSheet(String uri, String sheetName)
233 throws IOException {
234 Workbook workbook =
235 WorkbookFactory.create(IOUtil.getInputStreamForURI(uri));
236 Sheet sheet = workbook.getSheet(sheetName);
237 if (sheet == null) {
238 throw new ConfigurationError(
239 "Sheet '" + sheetName + "' not found in file " + uri);
240 }
241 return sheet;
242 }
243
244
245
246
247 private static String[] normalizeHeaders(Object[] rawHeaders) {
248 String[] headers = (String[]) ConverterManager
249 .convertAll(rawHeaders, new ToStringConverter(), String.class);
250 StringUtil.trimAll(headers);
251 int lastNonEmptyIndex = headers.length - 1;
252 while (lastNonEmptyIndex >= 0 &&
253 StringUtil.isEmpty(headers[lastNonEmptyIndex])) {
254 lastNonEmptyIndex--;
255 }
256 if (lastNonEmptyIndex < headers.length - 1) {
257 headers = ArrayUtil.copyOfRange(headers, 0, lastNonEmptyIndex + 1);
258 }
259 return headers;
260 }
261
262 @Override
263 public Class<Entity> getType() {
264 return Entity.class;
265 }
266
267
268
269
270 @Override
271 public DataContainer<Entity> next(DataContainer<Entity> container) {
272 if (source == null) {
273 return null;
274 }
275 Object[] rawData;
276 if (buffer != null) {
277 rawData = buffer;
278 buffer = null;
279 } else {
280 DataContainer<Object[]> tmp = source.next(sourceContainer.get());
281 if (tmp == null) {
282 return null;
283 }
284 rawData = tmp.getData();
285 }
286 resolveCollections(rawData);
287 Entity result = converter.convert(rawData);
288 return container.setData(result);
289 }
290
291 @Override
292 public void close() {
293 IOUtil.close(source);
294 }
295
296 private void parseHeaders() {
297 DataContainer<Object[]> tmp = this.source.next(sourceContainer.get());
298 this.headers = (tmp != null ? normalizeHeaders(tmp.getData()) : null);
299 }
300
301 private DataIterator<Object[]> createRawIterator(Sheet sheet,
302 boolean rowBased,
303 Converter<String, ?> preprocessor) {
304 XLSLineIterator iterator =
305 new XLSLineIterator(sheet, false, formatted, preprocessor);
306 if (emptyMarker != null) {
307 iterator.setEmptyMarker(emptyMarker);
308 }
309 if (!rowBased) {
310 return new OrthogonalArrayIterator<>(iterator);
311 }
312 return iterator;
313 }
314
315 private ComplexTypeDescriptor createDescriptor(String entityTypeName) {
316 ComplexTypeDescriptor descriptor;
317 descriptor = new ComplexTypeDescriptor(entityTypeName,
318 context.getLocalDescriptorProvider());
319 context.addLocalType(descriptor);
320 return descriptor;
321 }
322
323 private void resolveCollections(Object[] rawData) {
324 String colRefPrefix = PlatformDescriptor.getCollectionReferencePrefix();
325 for (int i = 0; i < rawData.length; i++) {
326 Object cellValue = rawData[i];
327 if (cellValue instanceof String &&
328 ((String) cellValue).startsWith(colRefPrefix)) {
329 String tabName =
330 ((String) cellValue).substring(colRefPrefix.length());
331 ComponentDescriptor component =
332 entityDescriptor.getComponent(headers[i]);
333 ComplexTypeDescriptor componentType = (component != null ?
334 (ComplexTypeDescriptor) component.getTypeDescriptor() :
335 null);
336 rawData[i] = mapTabToArray(tabName, componentType);
337 }
338 }
339 }
340
341 private Entity[] mapTabToArray(String tabName, ComplexTypeDescriptor type) {
342 Sheet sheet = getSheet(tabName);
343 List<Entity> elements =
344 parseAll(sheet, preprocessor, type, context, rowBased,
345 formatted, emptyMarker);
346 return CollectionUtil.toArray(elements, Entity.class);
347 }
348
349 private Sheet getSheet(String tabName) {
350 for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
351 Sheet candidate = workbook.getSheetAt(i);
352 if (candidate.getSheetName().trim()
353 .equalsIgnoreCase(tabName.trim())) {
354 return candidate;
355 }
356 }
357
358 throw new ConfigurationError("Tab '" + tabName + "' not found" +
359 (uri != null ? " in " + uri : ""));
360 }
361
362
363
364
365 @Override
366 public String toString() {
367 return getClass().getSimpleName() + "[" + source + "]";
368 }
369
370 }