1 package nl.dedicon.pipeline.braille.calabash.impl;
2
3 import com.xmlcalabash.core.XProcException;
4 import com.xmlcalabash.core.XProcRuntime;
5 import com.xmlcalabash.core.XProcStep;
6 import com.xmlcalabash.io.ReadablePipe;
7 import com.xmlcalabash.io.WritablePipe;
8 import com.xmlcalabash.library.DefaultStep;
9 import com.xmlcalabash.runtime.XAtomicStep;
10 import java.io.StringReader;
11 import java.time.LocalDate;
12 import java.time.format.DateTimeFormatter;
13 import java.util.ArrayList;
14 import java.util.Collection;
15 import java.util.Collections;
16 import java.util.List;
17 import java.util.Objects;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20 import javax.xml.transform.stream.StreamSource;
21 import static net.sf.saxon.s9api.Axis.CHILD;
22 import net.sf.saxon.s9api.DocumentBuilder;
23 import net.sf.saxon.s9api.QName;
24 import net.sf.saxon.s9api.SaxonApiException;
25 import net.sf.saxon.s9api.XdmItem;
26 import net.sf.saxon.s9api.XdmNode;
27 import net.sf.saxon.s9api.XdmSequenceIterator;
28 import nl.dedicon.pipeline.braille.model.Book;
29 import nl.dedicon.pipeline.braille.model.Page;
30 import nl.dedicon.pipeline.braille.model.Section;
31 import nl.dedicon.pipeline.braille.model.Volume;
32 import org.daisy.braille.api.embosser.FileFormat;
33 import org.daisy.common.xproc.calabash.XProcStepProvider;
34 import org.daisy.pipeline.braille.common.Provider.util.MemoizingProvider;
35 import static org.daisy.pipeline.braille.common.Provider.util.dispatch;
36 import static org.daisy.pipeline.braille.common.Provider.util.memoize;
37 import org.daisy.pipeline.braille.common.Query;
38 import static org.daisy.pipeline.braille.common.Query.util.mutableQuery;
39 import static org.daisy.pipeline.braille.common.Query.util.query;
40 import org.daisy.pipeline.braille.pef.FileFormatProvider;
41 import org.osgi.service.component.annotations.Component;
42 import org.osgi.service.component.annotations.Reference;
43 import org.osgi.service.component.annotations.ReferenceCardinality;
44 import org.osgi.service.component.annotations.ReferencePolicy;
45 import org.slf4j.Logger;
46 import org.slf4j.LoggerFactory;
47
48
49
50
51
52
53 public class MetadataStep extends DefaultStep {
54
55 private static final Logger logger = LoggerFactory.getLogger(MetadataStep.class);
56
57 private static final QName _xquery = new QName("xquery");
58 private static final QName _identifier = new QName("identifier");
59 private static final QName _brf_file_extension = new QName("brf-file-extension");
60 private static final QName _brf_file_format = new QName("brf-file-format");
61 private static final QName _brf_name_pattern = new QName("brf-name-pattern");
62 private static final QName _brf_number_width = new QName("brf-number-width");
63 private static final QName _optional_date = new QName("optional-date");
64
65 private static final String PEF_NAMESPACE = "http://www.daisy.org/ns/2008/pef";
66 private static final String BRAILLE_DIGITS = "⠚⠁⠃⠉⠙⠑⠋⠛⠓⠊";
67
68 private static final Pattern HEADER = Pattern.compile("[\u2800\\s]+(⠼[" + BRAILLE_DIGITS + "]+|)([\u2800\\s]?⠤[\u2800\\s]?⠼[" + BRAILLE_DIGITS + "]+|)[\u2800\\s]+(⠼[" + BRAILLE_DIGITS + "]+|)");
69 private static final DateTimeFormatter DAY_MONTH_YEAR = DateTimeFormatter.ofPattern("d-M-Y");
70
71 private final MemoizingProvider<Query,FileFormat> fileFormatProvider;
72
73 private ReadablePipe source = null;
74 private WritablePipe result = null;
75
76 private MetadataStep(XProcRuntime runtime, XAtomicStep step, MemoizingProvider<Query,FileFormat> fileFormatProvider) {
77 super(runtime, step);
78 this.fileFormatProvider = fileFormatProvider;
79 }
80
81 @Override
82 public void setInput(String port, ReadablePipe pipe) {
83 source = pipe;
84 }
85
86 @Override
87 public void setOutput(String port, WritablePipe pipe) {
88 result = pipe;
89 }
90
91 @Override
92 public void reset() {
93 source.resetReader();
94 result.resetWriter();
95 }
96
97 @Override
98 public void run() throws SaxonApiException {
99 super.run();
100
101 try {
102
103 XdmNode pef = source.read();
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118 String identifier = getOption(_identifier, "");
119 String brfFileFormat = getOption(_brf_file_format, "");
120 String brfNamePattern = getOption(_brf_name_pattern, "");
121 int brfNumberWidth = getOption(_brf_number_width, 0);
122 String optionalDate = getOption(_optional_date, "");
123
124 String brfFileExtension = getFileExtension(brfFileFormat);
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152 Book book = parsePEF(pef);
153 String metadataXml = createMetadataXml(book, identifier, brfNamePattern, brfNumberWidth, brfFileExtension, optionalDate);
154 DocumentBuilder documentBuilder = runtime.getProcessor().newDocumentBuilder();
155 XdmNode metadata = documentBuilder.build(new StreamSource(new StringReader(metadataXml)));
156
157 result.write(metadata);
158
159 } catch (Exception e) {
160
161 logger.error("dedicon:metadata failed", e);
162 throw new XProcException(step.getNode(), e);
163
164 }
165 }
166
167 private String getFileExtension (String fileFormatQuery) {
168 Query.MutableQuery q = mutableQuery(query(fileFormatQuery));
169 Iterable<FileFormat> fileFormats = fileFormatProvider.get(q);
170 String fileExtension = "";
171 for (FileFormat fileFormat : fileFormats) {
172 fileExtension = fileFormat.getFileExtension();
173 break;
174 }
175 return fileExtension;
176 }
177
178 private Book parsePEF(XdmNode pef) {
179 Book book = new Book();
180 getChildren(pef, PEF_NAMESPACE, "pef").forEach(pefRoot -> {
181 getChildren(pefRoot, PEF_NAMESPACE, "body").forEach(pefBody -> {
182 getChildren(pefBody, PEF_NAMESPACE, "volume").forEach(pefVolume -> {
183 Volume volume = new Volume();
184 book.getVolumes().add(volume);
185 String pefDuplex = pefVolume.getAttributeValue(new QName("duplex"));
186 volume.setDuplex("true".equalsIgnoreCase(pefDuplex));
187 getChildren(pefVolume, PEF_NAMESPACE, "section").forEach(pefSection -> {
188 Section section = new Section();
189 volume.getSections().add(section);
190 getChildren(pefSection, PEF_NAMESPACE, "page").forEach(pefPage -> {
191 Page page = new Page();
192 setPageNumbers(page, pefPage);
193 section.getPages().add(page);
194 });
195 });
196 });
197 });
198 });
199 determineVolumeMetadata(book);
200 return book;
201 }
202
203 private void setPageNumbers(Page page, XdmNode pefPage) {
204 for(XdmNode pefRow : getChildren(pefPage, PEF_NAMESPACE, "row")) {
205 String header = pefRow.getStringValue();
206 setPageNumbers(page, header);
207 break;
208 }
209 }
210
211
212
213 private List<XdmNode> getChildren(XdmNode parent, String namespace, String child) {
214 List<XdmNode> children = new ArrayList<>();
215 XdmSequenceIterator iterator = parent.axisIterator(CHILD, new QName(namespace, child));
216 while (iterator.hasNext()) {
217 XdmItem item = iterator.next();
218 if (item instanceof XdmNode) {
219 children.add((XdmNode)item);
220 }
221 }
222 return children;
223 }
224
225 private void setPageNumbers(Page page, String header) {
226 Matcher pageNumbersMatcher = HEADER.matcher(header);
227 if (pageNumbersMatcher.find()) {
228 String fromPrintPageNumber = afterNumberSign(pageNumbersMatcher.group(1));
229 String untilPrintPageNumber = afterNumberSign(pageNumbersMatcher.group(2));
230 String pageNumber = afterNumberSign(pageNumbersMatcher.group(3));
231
232
233 if (fromPrintPageNumber.length() > 0 && untilPrintPageNumber.length() == 0) {
234 untilPrintPageNumber = fromPrintPageNumber;
235 }
236 if (fromPrintPageNumber.length() == 0 && untilPrintPageNumber.length() > 0) {
237 fromPrintPageNumber = untilPrintPageNumber;
238 }
239
240 page.setFromPrintPageNumber(getNumber(fromPrintPageNumber));
241 page.setUntilPrintPageNumber(getNumber(untilPrintPageNumber));
242 page.setPageNumber(getNumber(pageNumber));
243 }
244
245
246 }
247
248 private String afterNumberSign(String brailleNumber) {
249 int index = brailleNumber.indexOf('⠼');
250 if (index >= 0) {
251 return brailleNumber.substring(index + 1);
252 }
253
254 return "";
255 }
256
257 private Integer getNumber(String brailleNumber) {
258 if (brailleNumber.length() == 0) {
259 return null;
260 }
261
262 int number = 0;
263 for (int i = 0; i < brailleNumber.length(); i++) {
264 char brailleDigit = brailleNumber.charAt(i);
265 int digit = BRAILLE_DIGITS.indexOf(brailleDigit);
266 number = number*10 + digit;
267 }
268 return number;
269 }
270
271 private void determineVolumeMetadata(Book book) {
272 Volume previousVolume = null;
273 int expectedFirstPageNumber = 1;
274 for (Volume volume : book.getVolumes()) {
275 volume.setFirstPrintPageNumber(getFirstPrintPageNumber(volume));
276 volume.setLastPrintPageNumber(getLastPrintPageNumber(volume));
277 expectedFirstPageNumber = setPageNumbers(volume, expectedFirstPageNumber);
278
279 if (previousVolume != null) {
280 previousVolume.setLastPageNumber(volume.getFirstPageNumber() - 1);
281 }
282 previousVolume = volume;
283 }
284 }
285
286 private Integer getFirstPrintPageNumber(Volume volume) {
287 return volume.getSections().stream()
288 .map(Section::getPages)
289 .flatMap(Collection::stream)
290 .map(Page::getFromPrintPageNumber)
291 .filter(Objects::nonNull)
292 .findFirst()
293 .orElse(null);
294 }
295
296
297
298
299
300
301
302 private Integer getLastPrintPageNumber(Volume volume) {
303 return volume.getSections().stream()
304 .map(Section::getPages)
305 .flatMap(Collection::stream)
306 .map(Page::getUntilPrintPageNumber)
307 .filter(Objects::nonNull)
308 .sorted(Collections.reverseOrder())
309 .findFirst()
310 .orElse(null);
311 }
312
313
314
315
316
317
318
319
320 private int setPageNumbers(Volume volume, int expectedFirstPageNumber) {
321 volume.setFirstPageNumber(expectedFirstPageNumber);
322 volume.setLastPageNumber(expectedFirstPageNumber);
323 int lastPageNumber = expectedFirstPageNumber;
324 boolean pageNumberFound = false;
325 for (Section section : volume.getSections()) {
326 int pagesInThisSection = 0;
327 for (Page page : section.getPages()) {
328 if (page.getPageNumber() != null) {
329 if (!pageNumberFound) {
330
331 volume.setFirstPageNumber(volume.getFirstPageNumber() + page.getPageNumber() - lastPageNumber);
332 pageNumberFound = true;
333 }
334 lastPageNumber = page.getPageNumber();
335 }
336 volume.setLastPageNumber(lastPageNumber);
337 lastPageNumber ++;
338 pagesInThisSection ++;
339 }
340
341
342 if (volume.getDuplex()) {
343 lastPageNumber += pagesInThisSection % 2;
344 }
345 }
346
347 return lastPageNumber;
348 }
349
350 private String createMetadataXml(Book book, String identifier, String brfNamePattern, int brfNumberWidth, String brfFileExtension, String optionalDate) throws SaxonApiException {
351 String date = optionalDate;
352 if (date == null || date.length() == 0) {
353 date = LocalDate.now().format(DAY_MONTH_YEAR);
354 }
355 StringBuilder xml = new StringBuilder();
356 xml.append("<lois_id>").append(identifier).append("</lois_id>");
357 int volumeIndex = 0;
358 for (Volume volume : book.getVolumes()) {
359 volumeIndex ++;
360 xml.append("<volume>");
361 xml.append("<filename>").append(getFilename(volumeIndex, brfNamePattern, brfNumberWidth, brfFileExtension)).append("</filename>");
362 xml.append("<vtype>br</vtype>");
363 xml.append("<volumenumber>").append(volumeIndex).append("</volumenumber>");
364 xml.append("<fromip>");
365 if (volume.getFirstPrintPageNumber() != null) {
366 xml.append(volume.getFirstPrintPageNumber());
367 }
368 xml.append("</fromip>");
369 xml.append("<tillip>");
370 if (volume.getLastPrintPageNumber() != null) {
371 xml.append(volume.getLastPrintPageNumber());
372 }
373 xml.append("</tillip>");
374 xml.append("<ippages>");
375 if (volume.getFirstPrintPageNumber() != null && volume.getLastPrintPageNumber() != null) {
376 xml.append(volume.getLastPrintPageNumber() - volume.getFirstPrintPageNumber() + 1);
377 }
378 xml.append("</ippages>");
379 xml.append("<fromcp>");
380 if (volume.getFirstPageNumber() != null) {
381 xml.append(volume.getFirstPageNumber());
382 }
383 xml.append("</fromcp>");
384 xml.append("<tillcp>");
385 if (volume.getLastPageNumber() != null) {
386 xml.append(volume.getLastPageNumber());
387 }
388 xml.append("</tillcp>");
389 xml.append("<amount>");
390 if (volume.getFirstPageNumber() != null && volume.getLastPageNumber() != null) {
391 xml.append(volume.getLastPageNumber() - volume.getFirstPageNumber() + 1);
392 }
393 xml.append("</amount>");
394 xml.append("<last>").append(volumeIndex == book.getVolumes().size() ? "Y" : "N").append("</last>");
395 xml.append("<vreadydate>").append(date).append("</vreadydate>");
396 xml.append("</volume>");
397 }
398 String document = "<document>".concat(xml.toString()).concat("</document>");
399 return document;
400 }
401
402 private String getFilename(int volumeIndex, String brfNamePattern, int brfNumberWidth, String brfFileExtension) {
403 String brfNumber = String.valueOf(volumeIndex);
404 while (brfNumber.length() < brfNumberWidth) {
405 brfNumber = "0" + brfNumber;
406 }
407 return brfNamePattern.replace("{}", brfNumber) + brfFileExtension;
408 }
409
410 @Component(
411 name = "dedicon:metadata",
412 service = {XProcStepProvider.class},
413 property = {"type:String={http://www.dedicon.nl}metadata"}
414 )
415 public static class Provider implements XProcStepProvider {
416
417 private List<FileFormatProvider> fileFormatProviders = new ArrayList<>();
418 private MemoizingProvider<Query,FileFormat> fileFormatProvider = memoize(dispatch(fileFormatProviders));
419
420 @Override
421 public XProcStep newStep(XProcRuntime runtime, XAtomicStep step) {
422 return new MetadataStep(runtime, step, fileFormatProvider);
423 }
424
425 @Reference(
426 name = "FileFormatProvider",
427 unbind = "unbindFileFormatProvider",
428 service = FileFormatProvider.class,
429 cardinality = ReferenceCardinality.MULTIPLE,
430 policy = ReferencePolicy.DYNAMIC
431 )
432 protected void bindFileFormatProvider(FileFormatProvider provider) {
433 fileFormatProviders.add(provider);
434 }
435
436 protected void unbindFileFormatProvider(FileFormatProvider provider) {
437 fileFormatProviders.remove(provider);
438 this.fileFormatProvider.invalidateCache();
439 }
440 }
441 }