1 package nl.dedicon.pipeline.braille.calabash.impl;
2
3 import com.xmlcalabash.core.XProcException;
4 import com.xmlcalabash.core.XProcRuntime;
5 import com.xmlcalabash.core.XProcStep;
6 import com.xmlcalabash.io.ReadablePipe;
7 import com.xmlcalabash.io.WritablePipe;
8 import com.xmlcalabash.library.DefaultStep;
9 import com.xmlcalabash.runtime.XAtomicStep;
10 import java.io.StringReader;
11 import java.time.LocalDate;
12 import java.time.format.DateTimeFormatter;
13 import java.util.ArrayList;
14 import java.util.Collection;
15 import java.util.Collections;
16 import java.util.List;
17 import java.util.Objects;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20 import javax.xml.transform.stream.StreamSource;
21 import static net.sf.saxon.s9api.Axis.CHILD;
22 import net.sf.saxon.s9api.DocumentBuilder;
23 import net.sf.saxon.s9api.QName;
24 import net.sf.saxon.s9api.SaxonApiException;
25 import net.sf.saxon.s9api.XdmItem;
26 import net.sf.saxon.s9api.XdmNode;
27 import net.sf.saxon.s9api.XdmSequenceIterator;
28 import nl.dedicon.pipeline.braille.model.Book;
29 import nl.dedicon.pipeline.braille.model.Page;
30 import nl.dedicon.pipeline.braille.model.Section;
31 import nl.dedicon.pipeline.braille.model.Volume;
32 import org.apache.commons.lang3.StringUtils;
33 import org.daisy.braille.api.embosser.FileFormat;
34 import org.daisy.common.xproc.calabash.XProcStepProvider;
35 import org.daisy.pipeline.braille.common.Provider.util.MemoizingProvider;
36 import static org.daisy.pipeline.braille.common.Provider.util.dispatch;
37 import static org.daisy.pipeline.braille.common.Provider.util.memoize;
38 import org.daisy.pipeline.braille.common.Query;
39 import static org.daisy.pipeline.braille.common.Query.util.mutableQuery;
40 import static org.daisy.pipeline.braille.common.Query.util.query;
41 import org.daisy.pipeline.braille.pef.FileFormatProvider;
42 import org.osgi.service.component.annotations.Component;
43 import org.osgi.service.component.annotations.Reference;
44 import org.osgi.service.component.annotations.ReferenceCardinality;
45 import org.osgi.service.component.annotations.ReferencePolicy;
46 import org.slf4j.Logger;
47 import org.slf4j.LoggerFactory;
48
49
50
51
52
53
54 public class MetadataStep extends DefaultStep {
55
56 private static final Logger logger = LoggerFactory.getLogger(MetadataStep.class);
57
58 private static final QName _xquery = new QName("xquery");
59 private static final QName _identifier = new QName("identifier");
60 private static final QName _brf_file_extension = new QName("brf-file-extension");
61 private static final QName _brf_file_format = new QName("brf-file-format");
62 private static final QName _brf_name_pattern = new QName("brf-name-pattern");
63 private static final QName _brf_number_width = new QName("brf-number-width");
64 private static final QName _optional_date = new QName("optional-date");
65
66 private static final String PEF_NAMESPACE = "http://www.daisy.org/ns/2008/pef";
67 private static final String BRAILLE_DIGITS = "⠚⠁⠃⠉⠙⠑⠋⠛⠓⠊";
68
69 private static final Pattern HEADER = Pattern.compile("[\u2800\\s]+(⠼[" + BRAILLE_DIGITS + "]+|)([\u2800\\s]?⠤[\u2800\\s]?⠼[" + BRAILLE_DIGITS + "]+|)[\u2800\\s]+(⠼[" + BRAILLE_DIGITS + "]+|)");
70 private static final DateTimeFormatter DAY_MONTH_YEAR = DateTimeFormatter.ofPattern("d-M-Y");
71
72 private final MemoizingProvider<Query,FileFormat> fileFormatProvider;
73
74 private ReadablePipe source = null;
75 private WritablePipe result = null;
76
77 private MetadataStep(XProcRuntime runtime, XAtomicStep step, MemoizingProvider<Query,FileFormat> fileFormatProvider) {
78 super(runtime, step);
79 this.fileFormatProvider = fileFormatProvider;
80 }
81
82 @Override
83 public void setInput(String port, ReadablePipe pipe) {
84 source = pipe;
85 }
86
87 @Override
88 public void setOutput(String port, WritablePipe pipe) {
89 result = pipe;
90 }
91
92 @Override
93 public void reset() {
94 source.resetReader();
95 result.resetWriter();
96 }
97
98 @Override
99 public void run() throws SaxonApiException {
100 super.run();
101
102 try {
103
104 XdmNode pef = source.read();
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119 String identifier = getOption(_identifier, "");
120 String brfFileFormat = getOption(_brf_file_format, "");
121 String brfNamePattern = getOption(_brf_name_pattern, "");
122 int brfNumberWidth = getOption(_brf_number_width, 0);
123 String optionalDate = getOption(_optional_date, "");
124
125 String brfFileExtension = getFileExtension(brfFileFormat);
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153 Book book = parsePEF(pef);
154 String metadataXml = createMetadataXml(book, identifier, brfNamePattern, brfNumberWidth, brfFileExtension, optionalDate);
155 DocumentBuilder documentBuilder = runtime.getProcessor().newDocumentBuilder();
156 XdmNode metadata = documentBuilder.build(new StreamSource(new StringReader(metadataXml)));
157
158 result.write(metadata);
159
160 } catch (Exception e) {
161
162 logger.error("dedicon:metadata failed", e);
163 throw new XProcException(step.getNode(), e);
164
165 }
166 }
167
168 private String getFileExtension (String fileFormatQuery) {
169 Query.MutableQuery q = mutableQuery(query(fileFormatQuery));
170 Iterable<FileFormat> fileFormats = fileFormatProvider.get(q);
171 String fileExtension = "";
172 for (FileFormat fileFormat : fileFormats) {
173 fileExtension = fileFormat.getFileExtension();
174 break;
175 }
176 return fileExtension;
177 }
178
179 private Book parsePEF(XdmNode pef) {
180 Book book = new Book();
181 getChildren(pef, PEF_NAMESPACE, "pef").forEach(pefRoot -> {
182 getChildren(pefRoot, PEF_NAMESPACE, "body").forEach(pefBody -> {
183 getChildren(pefBody, PEF_NAMESPACE, "volume").forEach(pefVolume -> {
184 Volume volume = new Volume();
185 book.getVolumes().add(volume);
186 String pefDuplex = pefVolume.getAttributeValue(new QName("duplex"));
187 volume.setDuplex("true".equalsIgnoreCase(pefDuplex));
188 getChildren(pefVolume, PEF_NAMESPACE, "section").forEach(pefSection -> {
189 Section section = new Section();
190 volume.getSections().add(section);
191 getChildren(pefSection, PEF_NAMESPACE, "page").forEach(pefPage -> {
192 Page page = new Page();
193 setPageNumbers(page, pefPage);
194 section.getPages().add(page);
195 });
196 });
197 });
198 });
199 });
200 determineVolumeMetadata(book);
201 return book;
202 }
203
204 private void setPageNumbers(Page page, XdmNode pefPage) {
205 for(XdmNode pefRow : getChildren(pefPage, PEF_NAMESPACE, "row")) {
206 String header = pefRow.getStringValue();
207 setPageNumbers(page, header);
208 break;
209 }
210 }
211
212
213
214 private List<XdmNode> getChildren(XdmNode parent, String namespace, String child) {
215 List<XdmNode> children = new ArrayList<>();
216 XdmSequenceIterator iterator = parent.axisIterator(CHILD, new QName(namespace, child));
217 while (iterator.hasNext()) {
218 XdmItem item = iterator.next();
219 if (item instanceof XdmNode) {
220 children.add((XdmNode)item);
221 }
222 }
223 return children;
224 }
225
226 private void setPageNumbers(Page page, String header) {
227 Matcher pageNumbersMatcher = HEADER.matcher(header);
228 if (pageNumbersMatcher.find()) {
229 String fromPrintPageNumber = afterNumberSign(pageNumbersMatcher.group(1));
230 String untilPrintPageNumber = afterNumberSign(pageNumbersMatcher.group(2));
231 String pageNumber = afterNumberSign(pageNumbersMatcher.group(3));
232
233
234 if (fromPrintPageNumber.length() > 0 && untilPrintPageNumber.length() == 0) {
235 untilPrintPageNumber = fromPrintPageNumber;
236 }
237 if (fromPrintPageNumber.length() == 0 && untilPrintPageNumber.length() > 0) {
238 fromPrintPageNumber = untilPrintPageNumber;
239 }
240
241 page.setFromPrintPageNumber(getNumber(fromPrintPageNumber));
242 page.setUntilPrintPageNumber(getNumber(untilPrintPageNumber));
243 page.setPageNumber(getNumber(pageNumber));
244 }
245
246
247 }
248
249 private String afterNumberSign(String brailleNumber) {
250 int index = brailleNumber.indexOf('⠼');
251 if (index >= 0) {
252 return brailleNumber.substring(index + 1);
253 }
254
255 return "";
256 }
257
258 private Integer getNumber(String brailleNumber) {
259 if (brailleNumber.length() == 0) {
260 return null;
261 }
262
263 int number = 0;
264 for (int i = 0; i < brailleNumber.length(); i++) {
265 char brailleDigit = brailleNumber.charAt(i);
266 int digit = BRAILLE_DIGITS.indexOf(brailleDigit);
267 number = number*10 + digit;
268 }
269 return number;
270 }
271
272 private void determineVolumeMetadata(Book book) {
273 Volume previousVolume = null;
274 int expectedFirstPageNumber = 1;
275 for (Volume volume : book.getVolumes()) {
276 volume.setFirstPrintPageNumber(getFirstPrintPageNumber(volume));
277 volume.setLastPrintPageNumber(getLastPrintPageNumber(volume));
278 expectedFirstPageNumber = setPageNumbers(volume, expectedFirstPageNumber);
279
280 if (previousVolume != null) {
281 previousVolume.setLastPageNumber(volume.getFirstPageNumber() - 1);
282 }
283 previousVolume = volume;
284 }
285 }
286
287 private Integer getFirstPrintPageNumber(Volume volume) {
288 return volume.getSections().stream()
289 .map(Section::getPages)
290 .flatMap(Collection::stream)
291 .map(Page::getFromPrintPageNumber)
292 .filter(Objects::nonNull)
293 .findFirst()
294 .orElse(null);
295 }
296
297
298
299
300
301
302
303 private Integer getLastPrintPageNumber(Volume volume) {
304 return volume.getSections().stream()
305 .map(Section::getPages)
306 .flatMap(Collection::stream)
307 .map(Page::getUntilPrintPageNumber)
308 .filter(Objects::nonNull)
309 .sorted(Collections.reverseOrder())
310 .findFirst()
311 .orElse(null);
312 }
313
314
315
316
317
318
319
320
321 private int setPageNumbers(Volume volume, int expectedFirstPageNumber) {
322 volume.setFirstPageNumber(expectedFirstPageNumber);
323 volume.setLastPageNumber(expectedFirstPageNumber);
324 int lastPageNumber = expectedFirstPageNumber;
325 boolean pageNumberFound = false;
326 for (Section section : volume.getSections()) {
327 int pagesInThisSection = 0;
328 for (Page page : section.getPages()) {
329 if (page.getPageNumber() != null) {
330 if (!pageNumberFound) {
331
332 volume.setFirstPageNumber(volume.getFirstPageNumber() + page.getPageNumber() - lastPageNumber);
333 pageNumberFound = true;
334 }
335 lastPageNumber = page.getPageNumber();
336 }
337 volume.setLastPageNumber(lastPageNumber);
338 lastPageNumber ++;
339 pagesInThisSection ++;
340 }
341
342
343 if (volume.getDuplex()) {
344 lastPageNumber += pagesInThisSection % 2;
345 }
346 }
347
348 return lastPageNumber;
349 }
350
351 private String createMetadataXml(Book book, String identifier, String brfNamePattern, int brfNumberWidth, String brfFileExtension, String optionalDate) throws SaxonApiException {
352 String date = optionalDate;
353 if (StringUtils.isBlank(date)) {
354 date = LocalDate.now().format(DAY_MONTH_YEAR);
355 }
356 StringBuilder xml = new StringBuilder();
357 xml.append("<lois_id>").append(identifier).append("</lois_id>");
358 int volumeIndex = 0;
359 for (Volume volume : book.getVolumes()) {
360 volumeIndex ++;
361 xml.append("<volume>");
362 xml.append("<filename>").append(getFilename(volumeIndex, brfNamePattern, brfNumberWidth, brfFileExtension)).append("</filename>");
363 xml.append("<vtype>br</vtype>");
364 xml.append("<volumenumber>").append(volumeIndex).append("</volumenumber>");
365 xml.append("<fromip>");
366 if (volume.getFirstPrintPageNumber() != null) {
367 xml.append(volume.getFirstPrintPageNumber());
368 }
369 xml.append("</fromip>");
370 xml.append("<tillip>");
371 if (volume.getLastPrintPageNumber() != null) {
372 xml.append(volume.getLastPrintPageNumber());
373 }
374 xml.append("</tillip>");
375 xml.append("<ippages>");
376 if (volume.getFirstPrintPageNumber() != null && volume.getLastPrintPageNumber() != null) {
377 xml.append(volume.getLastPrintPageNumber() - volume.getFirstPrintPageNumber() + 1);
378 }
379 xml.append("</ippages>");
380 xml.append("<fromcp>");
381 if (volume.getFirstPageNumber() != null) {
382 xml.append(volume.getFirstPageNumber());
383 }
384 xml.append("</fromcp>");
385 xml.append("<tillcp>");
386 if (volume.getLastPageNumber() != null) {
387 xml.append(volume.getLastPageNumber());
388 }
389 xml.append("</tillcp>");
390 xml.append("<amount>");
391 if (volume.getFirstPageNumber() != null && volume.getLastPageNumber() != null) {
392 xml.append(volume.getLastPageNumber() - volume.getFirstPageNumber() + 1);
393 }
394 xml.append("</amount>");
395 xml.append("<last>").append(volumeIndex == book.getVolumes().size() ? "Y" : "N").append("</last>");
396 xml.append("<vreadydate>").append(date).append("</vreadydate>");
397 xml.append("</volume>");
398 }
399 String document = "<document>".concat(xml.toString()).concat("</document>");
400 return document;
401 }
402
403 private String getFilename(int volumeIndex, String brfNamePattern, int brfNumberWidth, String brfFileExtension) {
404 String brfNumber = String.valueOf(volumeIndex);
405 while (brfNumber.length() < brfNumberWidth) {
406 brfNumber = "0" + brfNumber;
407 }
408 return brfNamePattern.replace("{}", brfNumber) + brfFileExtension;
409 }
410
411 @Component(
412 name = "dedicon:metadata",
413 service = {XProcStepProvider.class},
414 property = {"type:String={http://www.dedicon.nl}metadata"}
415 )
416 public static class Provider implements XProcStepProvider {
417
418 private List<FileFormatProvider> fileFormatProviders = new ArrayList<>();
419 private MemoizingProvider<Query,FileFormat> fileFormatProvider = memoize(dispatch(fileFormatProviders));
420
421 @Override
422 public XProcStep newStep(XProcRuntime runtime, XAtomicStep step) {
423 return new MetadataStep(runtime, step, fileFormatProvider);
424 }
425
426 @Reference(
427 name = "FileFormatProvider",
428 unbind = "unbindFileFormatProvider",
429 service = FileFormatProvider.class,
430 cardinality = ReferenceCardinality.MULTIPLE,
431 policy = ReferencePolicy.DYNAMIC
432 )
433 protected void bindFileFormatProvider(FileFormatProvider provider) {
434 fileFormatProviders.add(provider);
435 }
436
437 protected void unbindFileFormatProvider(FileFormatProvider provider) {
438 fileFormatProviders.remove(provider);
439 this.fileFormatProvider.invalidateCache();
440 }
441 }
442 }