1 package nl.dedicon.pipeline.braille.step;
2
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.HashMap;
6 import java.util.HashSet;
7 import java.util.List;
8 import java.util.Map;
9 import java.util.Set;
10 import net.sf.saxon.s9api.Axis;
11 import net.sf.saxon.s9api.QName;
12 import net.sf.saxon.s9api.XdmNode;
13 import net.sf.saxon.s9api.XdmSequenceIterator;
14 import nl.dedicon.pipeline.braille.model.Context;
15 import nl.dedicon.pipeline.braille.model.Replace;
16 import nl.dedicon.pipeline.braille.model.Symbol;
17 import org.apache.commons.lang3.StringUtils;
18 import org.w3c.dom.Document;
19 import org.w3c.dom.Element;
20 import org.w3c.dom.Node;
21 import static nl.dedicon.pipeline.braille.step.NumericMode.decimalSign;
22 import static nl.dedicon.pipeline.braille.step.NumericMode.moneyZeros;
23 import static nl.dedicon.pipeline.braille.step.NumericMode.no;
24 import static nl.dedicon.pipeline.braille.step.NumericMode.yes;
25
26
27
28
29
30
31
32
33 public class SymbolsReplacer {
34 private static final QName _addleadingspace = new QName("addleadingspace");
35 private static final QName _addtrailingspace = new QName("addtrailingspace");
36 private static final QName _braille = new QName("braille");
37 private static final QName _char = new QName("char");
38 private static final QName _close = new QName("close");
39 private static final QName _context = new QName("context");
40 private static final QName _description = new QName("description");
41 private static final QName _language = new QName("language");
42 private static final QName _open = new QName("open");
43 private static final QName _removeleadingspace = new QName("removeleadingspace");
44 private static final QName _removetrailingspace = new QName("removetrailingspace");
45 private static final QName _replace = new QName("replace");
46 private static final QName _symbol = new QName("symbol");
47 private static final QName _symbols = new QName("symbols");
48
49 private final Map<String, Symbol> symbolsMap;
50 private final Integer[] symbolLengthsDescending;
51 private final Set<Replace> symbolsListReplaces = new HashSet<>();
52
53
54
55
56
57
58 public SymbolsReplacer(XdmNode symbolsCodeNode) {
59 this.symbolsMap = filterSymbols(symbolsCodeNode);
60 this.symbolLengthsDescending = determineSymbolLengths(this.symbolsMap);
61 }
62
63
64
65
66
67
68
69 private static Map<String, Symbol> filterSymbols(XdmNode symbolsCodeNode) {
70 Map<String, Symbol> symbolsMap = new HashMap<>();
71
72 XdmSequenceIterator symbolsIterator = symbolsCodeNode.axisIterator(Axis.CHILD, _symbols);
73 while (symbolsIterator.hasNext()) {
74 XdmNode symbolsNode = (XdmNode)symbolsIterator.next();
75 XdmSequenceIterator symbolIterator = symbolsNode.axisIterator(Axis.CHILD, _symbol);
76 while (symbolIterator.hasNext()) {
77
78 XdmNode symbolNode = (XdmNode)symbolIterator.next();
79 String character = Utils.getValue(symbolNode, _char);
80 String language = Utils.getValue(symbolNode, _language);
81
82 List<Replace> replaces = new ArrayList<>();
83 XdmSequenceIterator replaceIterator = symbolNode.axisIterator(Axis.CHILD, _replace);
84 while (replaceIterator.hasNext()) {
85
86 XdmNode replaceNode = (XdmNode)replaceIterator.next();
87 Context context = Context.get(replaceNode.getAttributeValue(_context));
88 String braille = Utils.getValue(replaceNode, _braille);
89 String description = Utils.getChildNode(replaceNode, _description).getStringValue();
90
91 if (context != null && StringUtils.isNotBlank(braille)) {
92
93 XdmNode brailleNode = Utils.getChildNode(replaceNode, _braille);
94 boolean brailleAddLeadingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_addleadingspace));
95 boolean brailleRemoveLeadingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_removeleadingspace));
96 boolean brailleAddTrailingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_addtrailingspace));
97 boolean brailleRemoveTrailingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_removetrailingspace));
98 String brailleOpen = brailleNode.getAttributeValue(_open);
99 String brailleClose = brailleNode.getAttributeValue(_close);
100
101 Replace replace = new Replace();
102 replace.setContext(context);
103 replace.setBraille(DediconBrl.convert(braille));
104 replace.setBrailleAddLeadingSpace(brailleAddLeadingSpace);
105 replace.setBrailleRemoveLeadingSpace(brailleRemoveLeadingSpace);
106 replace.setBrailleAddTrailingSpace(brailleAddTrailingSpace);
107 replace.setBrailleRemoveTrailingSpace(brailleRemoveTrailingSpace);
108 replace.setBrailleOpen(brailleOpen);
109 replace.setBrailleClose(brailleClose);
110 replace.setDescription(description);
111
112 replaces.add(replace);
113
114 }
115 }
116
117 if (StringUtils.isNotBlank(character) && !replaces.isEmpty()) {
118
119 Symbol symbol = new Symbol();
120 symbol.setCharacter(character);
121 symbol.setLanguage(language);
122 symbol.setReplaces(replaces);
123 replaces.stream().forEach(r -> r.setParent(symbol));
124 symbolsMap.put(entirelyUppercaseOrLowercase(character), symbol);
125 }
126 }
127 }
128 return symbolsMap;
129 }
130
131
132
133
134
135
136
137 private static Integer[] determineSymbolLengths(Map<String, Symbol> symbolsMap) {
138 Set<Integer> symbolLengths = new HashSet<>();
139 symbolsMap.keySet()
140 .stream()
141 .map(String::length)
142 .forEach(symbolLengths::add);
143 Integer[] symbolLengthsDescending = symbolLengths.toArray(new Integer[symbolLengths.size()]);
144
145 Arrays.sort(symbolLengthsDescending, (i,j) -> j.compareTo(i));
146 return symbolLengthsDescending;
147 }
148
149
150
151
152
153
154 protected Set<Replace> getSymbolsListReplaces() {
155 return symbolsListReplaces;
156 }
157
158 private String replace(final String source, final Context context) {
159 if (source == null) {
160 return null;
161 }
162
163 NumericMode numericMode = no;
164
165 String target = source;
166 int index = 0;
167 while (index < target.length()) {
168
169
170 char c = target.charAt(index);
171 if (Utils.isDigit(c)) {
172 numericMode = yes;
173 } else {
174 switch (numericMode) {
175 case yes:
176 if (Utils.isDecimalSeparator(c)) {
177 numericMode = decimalSign;
178 } else if (!Utils.isDigit(c)) {
179 numericMode = no;
180 }
181 break;
182 case decimalSign:
183 if (Utils.isDigit(c)) {
184 numericMode = yes;
185 } else if (Utils.isMoneyZeros(c)) {
186 numericMode = moneyZeros;
187 } else {
188 numericMode = no;
189 }
190 break;
191 case moneyZeros:
192 if (!Utils.isMoneyZeros(c)) {
193 numericMode = no;
194 }
195 break;
196 case no:
197 default:
198 break;
199 }
200 }
201
202
203 if (numericMode == no) {
204 for (Integer symbolLength : symbolLengthsDescending) {
205 if (index + symbolLength > target.length()) {
206 continue;
207 }
208 String substring = target.substring(index, index + symbolLength);
209 Replace replaceBook = determineReplace(substring, context, ReplaceTarget.book);
210 if (replaceBook == null && context != Context.Default) {
211 replaceBook = determineReplace(substring, Context.Default, ReplaceTarget.book);
212 }
213 if (replaceBook == null) {
214 continue;
215 }
216
217
218
219
220
221 numericMode = no;
222
223 Replace replaceSymbolsList = determineReplace(substring, context, ReplaceTarget.symbolsList);
224 if (replaceSymbolsList == null && context != Context.Default) {
225 replaceSymbolsList = determineReplace(substring, Context.Default, ReplaceTarget.symbolsList);
226 }
227 if (replaceSymbolsList != null && StringUtils.isNotBlank(replaceSymbolsList.getDescription())) {
228 symbolsListReplaces.add(replaceSymbolsList);
229 }
230
231 String before = target.substring(0, index);
232 String braille = replaceBook.getBraille();
233 String after = target.substring(index + symbolLength);
234
235 if (StringUtils.isNotBlank(replaceBook.getBrailleOpen()) || StringUtils.isNotBlank(replaceBook.getBrailleClose())) {
236 int endIndex = StringUtils.indexOf(after, 'û');
237 if (startsWithBlank(after) && endIndex > 1) {
238
239 after = StringUtils.join(
240 replaceBook.getBrailleOpen(),
241 after.substring(1, endIndex),
242 replaceBook.getBrailleClose(),
243 after.substring(endIndex + 1)
244 );
245 } else {
246
247 while (startsWithBlank(after)) {
248 after = after.substring(1);
249 }
250 endIndex = 0;
251 while (endIndex < after.length() && StringUtils.isNotBlank(after.substring(endIndex, endIndex + 1))) {
252 endIndex ++;
253 }
254 after = StringUtils.join(
255 replaceBook.getBrailleOpen(),
256 after.substring(0, endIndex),
257 replaceBook.getBrailleClose(),
258 after.substring(endIndex)
259 );
260 }
261 }
262 if (replaceBook.getBrailleAddLeadingSpace() && !endsWithBlank(before)) {
263 before = before.concat(" ");
264 index ++;
265 }
266 if (replaceBook.getBrailleRemoveLeadingSpace()) {
267 while (endsWithBlank(before)) {
268 before = StringUtils.chop(before);
269 index --;
270 }
271 }
272 if (replaceBook.getBrailleAddTrailingSpace() && !startsWithBlank(after)) {
273 after = " ".concat(after);
274 index ++;
275 }
276 if (replaceBook.getBrailleRemoveTrailingSpace()) {
277 while (startsWithBlank(after)) {
278 after = after.substring(1);
279 }
280 }
281
282 target = StringUtils.join(before, braille, after);
283
284 index += braille.length() - 1;
285 break;
286 }
287 }
288 index++;
289 }
290
291 return target;
292 }
293
294 private Replace determineReplace(String substring, Context context, ReplaceTarget replaceTarget) {
295 String key = entirelyUppercaseOrLowercase(substring);
296 Symbol symbol = null;
297 switch (replaceTarget) {
298 case book:
299 if (isUpperCase(key)) {
300 symbol = symbolsMap.get(key);
301 if (symbol == null) {
302 symbol = symbolsMap.get(key.toLowerCase());
303 }
304 } else {
305 symbol = symbolsMap.get(key);
306 }
307 break;
308 case symbolsList:
309 if (isUpperCase(key)) {
310 symbol = symbolsMap.get(key.toLowerCase());
311 if (symbol == null) {
312 symbol = symbolsMap.get(key);
313 }
314 } else {
315 symbol = symbolsMap.get(key);
316 }
317 break;
318 }
319 if (symbol == null) {
320 return null;
321 }
322 return symbol.getReplaces()
323 .stream()
324 .filter(r -> r.getContext() == context)
325 .findFirst()
326 .orElse(null)
327 ;
328 }
329
330
331
332
333
334
335 public void replaceSymbols(Node node) {
336 if (node.getNodeType() == Node.TEXT_NODE) {
337 String text = node.getTextContent();
338 if (text != null && text.length() > 0) {
339 Context context = determineContext(node);
340 String replacement = replace(text, context);
341 node.setTextContent(replacement);
342 }
343 } else {
344 for (Node childNode = node.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
345
346 replaceSymbols(childNode);
347 }
348 }
349 }
350
351
352 private Context determineContext(Node textNode) {
353 Node parentNode = textNode.getParentNode();
354 if (parentNode != null) {
355 String attributeValue = ((Element)parentNode).getAttribute("class");
356 if ("dummy-formula".equals(attributeValue)) {
357 return Context.Formula;
358 }
359 }
360 return Context.Default;
361 }
362
363
364
365
366
367
368
369 public void insertSymbolsList(Document document, String header) {
370
371 String headerWithSymbolsReplaced = replace(header, Context.Default);
372
373 Element dtbook = document.getDocumentElement();
374 Node book = Utils.getChild(dtbook, "book");
375
376 if (book == null || getSymbolsListReplaces().isEmpty()) {
377 return;
378 }
379
380 Node frontMatter = Utils.getChild(book, "frontmatter");
381 if (frontMatter == null) {
382 frontMatter = Utils.addChild(book, "frontmatter");
383 }
384 Element level1 = Utils.addChildBefore(frontMatter, frontMatter.getFirstChild(), "level1");
385 level1.setAttribute("class", "symbols_list");
386
387
388 Element pagenum = Utils.addChild(level1, "pagenum");
389 pagenum.setAttribute("id", "page-symbolslist");
390
391 if (StringUtils.isNotBlank(headerWithSymbolsReplaced)) {
392 Element h1 = Utils.addChild(level1, "h1");
393 h1.setTextContent(headerWithSymbolsReplaced);
394 }
395
396 Element list = Utils.addChild(level1, "list");
397 list.setAttribute("type", "pl");
398
399 getSymbolsListReplaces().stream()
400 .sorted((r1, r2) -> r1.getParent().getCharacter().compareTo(r2.getParent().getCharacter()))
401 .forEachOrdered(r -> {
402 String text = String.format("\u283F%s\u00A0 %s", r.getBraille(), r.getDescription());
403 Element li = Utils.addChild(list, "li");
404 li.setTextContent(text);
405 });
406 }
407
408 private static boolean startsWithBlank(String s) {
409 return StringUtils.length(s) > 0 && StringUtils.isBlank(StringUtils.left(s, 1));
410 }
411
412 private static boolean endsWithBlank(String s) {
413 return StringUtils.length(s) > 0 && StringUtils.isBlank(StringUtils.right(s, 1));
414 }
415
416
417 private static String entirelyUppercaseOrLowercase(String symbol) {
418 if (symbol == null) {
419 return null;
420 }
421 return symbol.toUpperCase().equals(symbol) ? symbol : symbol.toLowerCase();
422 }
423
424
425 private static boolean isUpperCase(String entirelyUpperCaseOrLowercaseString) {
426 if (entirelyUpperCaseOrLowercaseString == null) {
427 return false;
428 }
429 return !entirelyUpperCaseOrLowercaseString.toLowerCase().equals(entirelyUpperCaseOrLowercaseString);
430 }
431
432 private enum ReplaceTarget {
433 book,
434 symbolsList
435 }
436 }