1   package nl.dedicon.pipeline.braille.step;
2   
3   import java.util.ArrayList;
4   import java.util.Arrays;
5   import java.util.HashMap;
6   import java.util.HashSet;
7   import java.util.List;
8   import java.util.Map;
9   import java.util.Set;
10  import net.sf.saxon.s9api.Axis;
11  import net.sf.saxon.s9api.QName;
12  import net.sf.saxon.s9api.XdmNode;
13  import net.sf.saxon.s9api.XdmSequenceIterator;
14  import nl.dedicon.pipeline.braille.model.Context;
15  import nl.dedicon.pipeline.braille.model.Replace;
16  import nl.dedicon.pipeline.braille.model.Symbol;
17  import org.apache.commons.lang3.StringUtils;
18  import org.w3c.dom.Document;
19  import org.w3c.dom.Element;
20  import org.w3c.dom.Node;
21  import static nl.dedicon.pipeline.braille.step.NumericMode.decimalSign;
22  import static nl.dedicon.pipeline.braille.step.NumericMode.moneyZeros;
23  import static nl.dedicon.pipeline.braille.step.NumericMode.no;
24  import static nl.dedicon.pipeline.braille.step.NumericMode.yes;
25  
26  
27  /**
28   * Replace symbols in a DTBook and insert a symbols list
29   * Based on W3C DOM
30   * 
31   * @author Paul Rambags
32   */
33  public class SymbolsReplacer  {
34      private static final QName _addleadingspace = new QName("addleadingspace");
35      private static final QName _addtrailingspace = new QName("addtrailingspace");
36      private static final QName _braille = new QName("braille");
37      private static final QName _char = new QName("char");
38      private static final QName _close = new QName("close");
39      private static final QName _context = new QName("context");
40      private static final QName _description = new QName("description");
41      private static final QName _language = new QName("language");
42      private static final QName _open = new QName("open");
43      private static final QName _removeleadingspace = new QName("removeleadingspace");
44      private static final QName _removetrailingspace = new QName("removetrailingspace");
45      private static final QName _replace = new QName("replace");
46      private static final QName _symbol = new QName("symbol");
47      private static final QName _symbols = new QName("symbols");
48  
49      private final Map<String, Symbol> symbolsMap;
50      private final Integer[] symbolLengthsDescending;
51      private final Set<Replace> symbolsListReplaces = new HashSet<>();
52      
53      /**
54       * Constructor
55       * 
56       * @param symbolsCodeNode root document of the symbols code XML
57       */
58      public SymbolsReplacer(XdmNode symbolsCodeNode) {
59          this.symbolsMap = filterSymbols(symbolsCodeNode);
60          this.symbolLengthsDescending = determineSymbolLengths(this.symbolsMap);
61      }
62      
63      /**
64       * Generate a Symbols HashMap
65       * 
66       * @param symbolsCodeNode SymbolsCode root document of the symbols code file
67       * @return Character -> Symbol map
68       */
69      private static Map<String, Symbol> filterSymbols(XdmNode symbolsCodeNode) {
70          Map<String, Symbol> symbolsMap = new HashMap<>();
71  
72          XdmSequenceIterator symbolsIterator = symbolsCodeNode.axisIterator(Axis.CHILD, _symbols);
73          while (symbolsIterator.hasNext()) {
74              XdmNode symbolsNode = (XdmNode)symbolsIterator.next();
75              XdmSequenceIterator symbolIterator = symbolsNode.axisIterator(Axis.CHILD, _symbol);
76              while (symbolIterator.hasNext()) {
77  
78                  XdmNode symbolNode = (XdmNode)symbolIterator.next();
79                  String character = Utils.getValue(symbolNode, _char);
80                  String language = Utils.getValue(symbolNode, _language);
81  
82                  List<Replace> replaces = new ArrayList<>();
83                  XdmSequenceIterator replaceIterator = symbolNode.axisIterator(Axis.CHILD, _replace);
84                  while (replaceIterator.hasNext()) {
85  
86                      XdmNode replaceNode = (XdmNode)replaceIterator.next();
87                      Context context = Context.get(replaceNode.getAttributeValue(_context));
88                      String braille = Utils.getValue(replaceNode, _braille);
89                      String description = Utils.getChildNode(replaceNode, _description).getStringValue();
90  
91                      if (context != null && StringUtils.isNotBlank(braille)) {
92  
93                          XdmNode brailleNode = Utils.getChildNode(replaceNode, _braille);
94                          boolean brailleAddLeadingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_addleadingspace));
95                          boolean brailleRemoveLeadingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_removeleadingspace));
96                          boolean brailleAddTrailingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_addtrailingspace));
97                          boolean brailleRemoveTrailingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_removetrailingspace));
98                          String brailleOpen = brailleNode.getAttributeValue(_open);
99                          String brailleClose = brailleNode.getAttributeValue(_close);
100 
101                         Replace replace = new Replace();
102                         replace.setContext(context);
103                         replace.setBraille(DediconBrl.convert(braille));
104                         replace.setBrailleAddLeadingSpace(brailleAddLeadingSpace);
105                         replace.setBrailleRemoveLeadingSpace(brailleRemoveLeadingSpace);
106                         replace.setBrailleAddTrailingSpace(brailleAddTrailingSpace);
107                         replace.setBrailleRemoveTrailingSpace(brailleRemoveTrailingSpace);
108                         replace.setBrailleOpen(brailleOpen);
109                         replace.setBrailleClose(brailleClose);
110                         replace.setDescription(description);
111                         
112                         replaces.add(replace);
113 
114                     }
115                 }
116 
117                 if (StringUtils.isNotBlank(character) && !replaces.isEmpty()) {
118 
119                     Symbol symbol = new Symbol();
120                     symbol.setCharacter(character);
121                     symbol.setLanguage(language);
122                     symbol.setReplaces(replaces);
123                     replaces.stream().forEach(r -> r.setParent(symbol));
124                     symbolsMap.put(entirelyUppercaseOrLowercase(character), symbol);
125                 }
126             }
127         }
128         return symbolsMap;
129     }
130 
131     /**
132      * Determine the different symbol lengths in descending order
133      * 
134      * @param symbolsMap symbols map
135      * @return symbol lengths in descending order
136      */
137     private static Integer[] determineSymbolLengths(Map<String, Symbol> symbolsMap) {
138         Set<Integer> symbolLengths = new HashSet<>();
139         symbolsMap.keySet()
140                 .stream()
141                 .map(String::length)
142                 .forEach(symbolLengths::add);
143         Integer[] symbolLengthsDescending = symbolLengths.toArray(new Integer[symbolLengths.size()]);
144         // sort descending
145         Arrays.sort(symbolLengthsDescending, (i,j) -> j.compareTo(i));
146         return symbolLengthsDescending;
147     }
148 
149     /**
150      * Get the set of replaces for the symbol list of this DTBook
151      * 
152      * @return set of replaces
153      */
154     protected Set<Replace> getSymbolsListReplaces() {
155         return symbolsListReplaces;
156     }
157 
158     private String replace(final String source, final Context context) {
159         if (source == null) {
160             return null;
161         }
162         
163         NumericMode numericMode = no;
164         
165         String target = source;
166         int index = 0;
167         while (index < target.length()) {
168             
169             // numeric mode
170             char c = target.charAt(index);
171             if (Utils.isDigit(c)) {
172                 numericMode = yes;
173             } else {
174                 switch (numericMode) {
175                     case yes:
176                         if (Utils.isDecimalSeparator(c)) {
177                             numericMode = decimalSign;
178                         } else if (!Utils.isDigit(c)) {
179                             numericMode = no;
180                         }
181                         break;
182                     case decimalSign:
183                         if (Utils.isDigit(c)) {
184                             numericMode = yes;
185                         } else if (Utils.isMoneyZeros(c)) {
186                             numericMode = moneyZeros;
187                         } else {
188                             numericMode = no;
189                         }
190                         break;
191                     case moneyZeros:
192                         if (!Utils.isMoneyZeros(c)) {
193                             numericMode = no;
194                         }
195                         break;
196                     case no:
197                     default:
198                         break;
199                 }
200             }
201             
202             // do not replace symbols in numeric mode
203             if (numericMode == no) {
204                 for (Integer symbolLength : symbolLengthsDescending) {
205                     if (index + symbolLength > target.length()) {
206                         continue;
207                     }
208                     String substring = target.substring(index, index + symbolLength);
209                     Replace replaceBook = determineReplace(substring, context, ReplaceTarget.book);
210                     if (replaceBook == null && context != Context.Default) {
211                         replaceBook = determineReplace(substring, Context.Default, ReplaceTarget.book);
212                     }
213                     if (replaceBook == null) {
214                         continue;
215                     }
216 
217                     //
218                     // Replace found
219                     //
220 
221                     numericMode = no;
222 
223                     Replace replaceSymbolsList = determineReplace(substring, context, ReplaceTarget.symbolsList);
224                     if (replaceSymbolsList == null && context != Context.Default) {
225                         replaceSymbolsList = determineReplace(substring, Context.Default, ReplaceTarget.symbolsList);
226                     }
227                     if (replaceSymbolsList != null && StringUtils.isNotBlank(replaceSymbolsList.getDescription())) {
228                         symbolsListReplaces.add(replaceSymbolsList);
229                     }
230 
231                     String before = target.substring(0, index);
232                     String braille = replaceBook.getBraille();
233                     String after = target.substring(index + symbolLength);
234 
235                     if (StringUtils.isNotBlank(replaceBook.getBrailleOpen()) || StringUtils.isNotBlank(replaceBook.getBrailleClose())) {
236                         int endIndex = StringUtils.indexOf(after, 'û');
237                         if (startsWithBlank(after) && endIndex > 1) {
238                             // replace leading white space and replace û
239                             after = StringUtils.join(
240                                     replaceBook.getBrailleOpen(),
241                                     after.substring(1, endIndex),
242                                     replaceBook.getBrailleClose(),
243                                     after.substring(endIndex + 1)
244                             );
245                         } else {
246                             // remove leading white space and find first non-whitespace
247                             while (startsWithBlank(after)) {
248                                 after = after.substring(1);
249                             }
250                             endIndex = 0;
251                             while (endIndex < after.length() && StringUtils.isNotBlank(after.substring(endIndex, endIndex + 1))) {
252                                 endIndex ++;
253                             }
254                             after = StringUtils.join(
255                                     replaceBook.getBrailleOpen(),
256                                     after.substring(0, endIndex),
257                                     replaceBook.getBrailleClose(),
258                                     after.substring(endIndex)
259                             );
260                         }
261                     }
262                     if (replaceBook.getBrailleAddLeadingSpace() && !endsWithBlank(before)) {
263                         before = before.concat(" ");
264                         index ++;
265                     }
266                     if (replaceBook.getBrailleRemoveLeadingSpace()) {
267                         while (endsWithBlank(before)) {
268                             before = StringUtils.chop(before);
269                             index --;
270                         }
271                     }
272                     if (replaceBook.getBrailleAddTrailingSpace() && !startsWithBlank(after)) {
273                         after = " ".concat(after);
274                         index ++;
275                     }
276                     if (replaceBook.getBrailleRemoveTrailingSpace()) {
277                         while (startsWithBlank(after)) {
278                             after = after.substring(1);
279                         }
280                     }
281 
282                     target = StringUtils.join(before, braille, after);
283 
284                     index += braille.length() - 1;
285                     break;
286                 }
287             }
288             index++;
289         }
290 
291         return target;
292     }
293 
294     private Replace determineReplace(String substring, Context context, ReplaceTarget replaceTarget) {
295         String key = entirelyUppercaseOrLowercase(substring);
296         Symbol symbol = null;
297         switch (replaceTarget) {
298             case book:
299                 if (isUpperCase(key)) {
300                     symbol = symbolsMap.get(key);
301                     if (symbol == null) {
302                         symbol = symbolsMap.get(key.toLowerCase());
303                     }
304                 } else {
305                     symbol = symbolsMap.get(key);
306                 }
307                 break;
308             case symbolsList:
309                 if (isUpperCase(key)) {
310                     symbol = symbolsMap.get(key.toLowerCase());
311                     if (symbol == null) {
312                         symbol = symbolsMap.get(key);
313                     }
314                 } else {
315                     symbol = symbolsMap.get(key);
316                 }
317                 break;
318         }
319         if (symbol == null) {
320             return null;
321         }
322         return symbol.getReplaces()
323                 .stream()
324                 .filter(r -> r.getContext() == context)
325                 .findFirst()
326                 .orElse(null)
327                 ;
328     }
329     
330     /**
331      * Recursively replace all symbols in text nodes with their braille representation
332      * 
333      * @param node DTBook node
334      */
335     public void replaceSymbols(Node node) {
336         if (node.getNodeType() == Node.TEXT_NODE) {
337             String text = node.getTextContent();
338             if (text != null && text.length() > 0) {
339                 Context context = determineContext(node);
340                 String replacement = replace(text, context);
341                 node.setTextContent(replacement);
342             }
343         } else {
344             for (Node childNode = node.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
345                 // recursion
346                 replaceSymbols(childNode);
347             }
348         }
349     }
350     
351     // @todo Code
352     private Context determineContext(Node textNode) {
353         Node parentNode = textNode.getParentNode();
354         if (parentNode != null) {
355             String attributeValue = ((Element)parentNode).getAttribute("class");
356             if ("dummy-formula".equals(attributeValue)) {
357                 return Context.Formula;
358             }
359         }
360         return Context.Default;
361     }
362 
363     /**
364      * Inserts the symbols list in a DTBook
365      * 
366      * @param document DTBook
367      * @param header symbols list header
368      */
369     public void insertSymbolsList(Document document, String header) {
370         // the header can contain symbols, too
371         String headerWithSymbolsReplaced = replace(header, Context.Default);
372 
373         Element dtbook = document.getDocumentElement();
374         Node book = Utils.getChild(dtbook, "book");
375 
376         if (book == null || getSymbolsListReplaces().isEmpty()) {
377             return;
378         }
379         
380         Node frontMatter = Utils.getChild(book, "frontmatter");
381         if (frontMatter == null) {
382             frontMatter = Utils.addChild(book, "frontmatter");
383         }
384         Element level1 = Utils.addChildBefore(frontMatter, frontMatter.getFirstChild(), "level1");
385         level1.setAttribute("class", "symbols_list");
386         
387         // empty page number
388         Element pagenum = Utils.addChild(level1, "pagenum");
389         pagenum.setAttribute("id", "page-symbolslist");
390 
391         if (StringUtils.isNotBlank(headerWithSymbolsReplaced)) {
392             Element h1 = Utils.addChild(level1, "h1");
393             h1.setTextContent(headerWithSymbolsReplaced);
394         }
395         
396         Element list = Utils.addChild(level1, "list");
397         list.setAttribute("type", "pl");
398         
399         getSymbolsListReplaces().stream()
400                 .sorted((r1, r2) -> r1.getParent().getCharacter().compareTo(r2.getParent().getCharacter()))
401                 .forEachOrdered(r -> {
402                     String text = String.format("\u283F%s\u00A0 %s", r.getBraille(), r.getDescription());
403                     Element li = Utils.addChild(list, "li");
404                     li.setTextContent(text);
405                 });
406     }
407     
408     private static boolean startsWithBlank(String s) {
409         return StringUtils.length(s) > 0 && StringUtils.isBlank(StringUtils.left(s, 1));
410     }
411 
412     private static boolean endsWithBlank(String s) {
413         return StringUtils.length(s) > 0 && StringUtils.isBlank(StringUtils.right(s, 1));
414     }
415     
416     // a key in the symbolsMap must be entirely uppercase or lowercase
417     private static String entirelyUppercaseOrLowercase(String symbol) {
418         if (symbol == null) {
419             return null;
420         }
421         return symbol.toUpperCase().equals(symbol) ? symbol : symbol.toLowerCase();
422     }
423     
424     // determine for an entirely uppercase or lowercase string, whether it is in uppercase
425     private static boolean isUpperCase(String entirelyUpperCaseOrLowercaseString) {
426         if (entirelyUpperCaseOrLowercaseString == null) {
427             return false;
428         }
429         return !entirelyUpperCaseOrLowercaseString.toLowerCase().equals(entirelyUpperCaseOrLowercaseString);
430     }
431     
432     private enum ReplaceTarget {
433         book,
434         symbolsList
435     }
436 }