1   package nl.dedicon.pipeline.braille.step;
2   
3   import static nl.dedicon.pipeline.braille.step.CapitalMode.letter;
4   import static nl.dedicon.pipeline.braille.step.CapitalMode.lowercase;
5   import static nl.dedicon.pipeline.braille.step.CapitalMode.permanent;
6   import static nl.dedicon.pipeline.braille.step.CapitalMode.word;
7   import static nl.dedicon.pipeline.braille.step.NumericMode.decimalSign;
8   import static nl.dedicon.pipeline.braille.step.NumericMode.moneyZeros;
9   import static nl.dedicon.pipeline.braille.step.NumericMode.no;
10  import static nl.dedicon.pipeline.braille.step.NumericMode.start;
11  import static nl.dedicon.pipeline.braille.step.NumericMode.yes;
12  import org.w3c.dom.Document;
13  import org.w3c.dom.Node;
14  
15  /**
16   * Braille to Text
17   *
18   * @author Paul Rambags
19   */
20  public class BrailleToText {
21  
22      private static final char
23              BRAILLE_CAPITAL = 0x2828,           // '⠨'
24              BRAILLE_CAPITAL_PERMANENT = 0x2818, // '⠘'
25              BRAILLE_DIGIT = 0x283C,             // '⠼'
26              BRAILLE_HYPHEN = 0x2824,            // '⠤'
27              BRAILLE_RESTORE = 0x2820,           // '⠠'
28              BRAILLE_SPACE = 0x2800,             // '⠀'
29              SPACE = ' ';
30      
31      private CapitalMode capitalMode = lowercase;
32      private NumericMode numericMode = no;
33      private boolean endsWithHyphen = false;
34  
35      public void convert (Document document) {
36          Node body = Utils.getChild(document.getDocumentElement(), "body");
37          Node main = Utils.getFirstChildWithAttribute(body, "div", "id", "main");
38          while (main != null) {
39              Node volume = Utils.getFirstChildWithAttribute(main, "div", "class", "volume");
40              while (volume != null) {
41                  Node page = Utils.getFirstChildWithAttribute(volume, "div", "class", "page");
42                  while (page != null) {
43                      Node braillePage = Utils.getFirstChildWithAttribute(page, "div", "class", "braille-page");
44                      Node textPage = Utils.getFirstChildWithAttribute(page, "div", "class", "text-page");
45                      Node brailleRow = Utils.getFirstChildWithAttribute(braillePage, "div", "class", "row");
46                      Node textRow = Utils.getFirstChildWithAttribute(textPage, "div", "class", "row");
47                      while (brailleRow != null && textRow != null) {
48                          String braille = brailleRow.getTextContent();
49                          String text = convert(braille);
50                          textRow.setTextContent(text);
51  
52                          brailleRow = Utils.getNextSiblingWithAttribute(brailleRow, "div", "class", "row");
53                          textRow = Utils.getNextSiblingWithAttribute(textRow, "div", "class", "row");
54                      }
55                      page = Utils.getNextSiblingWithAttribute(page, "div", "class", "page");
56                  }
57                  volume = Utils.getNextSiblingWithAttribute(volume, "div", "class", "volume");
58              }
59              main = Utils.getNextSiblingWithAttribute(main, "div", "id", "main");
60          }
61      }
62      
63      // @todo Houd rekening met meerdere betekenissen van braille symbolen b.v. (123456) = é of %
64      private String convert (String braille) {
65          StringBuilder text = new StringBuilder();
66          
67          for (char b : braille.toCharArray()) {
68              
69              // text character
70              char t;
71              if (numericMode != no) {
72                  t = Utils.convertBrailleNumeric(b);
73              } else {
74                  t = Utils.convertBraille(b);
75                  if (capitalMode != lowercase) {
76                      t = String.valueOf(t).toUpperCase().charAt(0);
77                  }
78              }
79              text.append(t);
80  
81              // numeric mode
82              if (b == BRAILLE_DIGIT) {
83                  numericMode = start;
84              } else {
85                  switch (numericMode) {
86                      case start:
87                          if (Utils.isBrailleMinus(b) || Utils.isBrailleDigit(b)) {
88                              numericMode = yes;
89                          }
90                          break;
91                      case yes:
92                          if (Utils.isBrailleDecimalSeparator(b)) {
93                              numericMode = decimalSign;
94                          } else if (!Utils.isBrailleDigit(b)) {
95                              numericMode = no;
96                          }
97                          break;
98                      case decimalSign:
99                          if (Utils.isBrailleDigit(b)) {
100                             numericMode = yes;
101                         } else if (Utils.isBrailleMoneyZeros(b)) {
102                             numericMode = moneyZeros;
103                         } else {
104                             numericMode = no;
105                         }
106                         break;
107                     case moneyZeros:
108                         if (!Utils.isBrailleMoneyZeros(b)) {
109                             numericMode = no;
110                         }
111                         break;
112                     case no:
113                     default:
114                         break;
115                 }
116             }
117             
118             // capital mode
119             if (b == BRAILLE_CAPITAL) {
120                 capitalMode = letter;
121             } else if (b == BRAILLE_CAPITAL_PERMANENT) {
122                 switch(capitalMode) {
123                     case lowercase: capitalMode = word;
124                     case letter: capitalMode = word;
125                     case word: capitalMode = permanent;
126                     case permanent: capitalMode = word;
127                 }
128             } else if (b == BRAILLE_RESTORE) {
129                 numericMode = no;
130                 capitalMode = lowercase;
131             } else if (isSpace(b)) {
132                 numericMode = no;
133                 if (capitalMode != permanent) {
134                     capitalMode = lowercase;
135                 }
136             } else if (capitalMode == letter) {
137                 capitalMode = lowercase;
138             }
139 
140             // ends with hyphen
141             if (b == BRAILLE_HYPHEN) {
142                 endsWithHyphen = true;
143             } else if (!isSpace(b)) {
144                 endsWithHyphen = false;
145             }
146         }
147         
148         // end of line
149         numericMode = no;
150         if (!endsWithHyphen) {
151             if (capitalMode != permanent) {
152                 capitalMode = lowercase;
153             }
154         }
155         
156         return text.toString();
157     }
158     
159     private static boolean isSpace(char b) {
160         return b == BRAILLE_SPACE || b == SPACE;
161     }
162 }