1 package nl.dedicon.pipeline.braille.step;
2
3 import static nl.dedicon.pipeline.braille.step.CapitalMode.letter;
4 import static nl.dedicon.pipeline.braille.step.CapitalMode.lowercase;
5 import static nl.dedicon.pipeline.braille.step.CapitalMode.permanent;
6 import static nl.dedicon.pipeline.braille.step.CapitalMode.word;
7 import static nl.dedicon.pipeline.braille.step.NumericMode.decimalSign;
8 import static nl.dedicon.pipeline.braille.step.NumericMode.moneyZeros;
9 import static nl.dedicon.pipeline.braille.step.NumericMode.no;
10 import static nl.dedicon.pipeline.braille.step.NumericMode.start;
11 import static nl.dedicon.pipeline.braille.step.NumericMode.yes;
12 import org.w3c.dom.Document;
13 import org.w3c.dom.Node;
14
15
16
17
18
19
20 public class BrailleToText {
21
22 private static final char
23 BRAILLE_CAPITAL = 0x2828,
24 BRAILLE_CAPITAL_PERMANENT = 0x2818,
25 BRAILLE_DIGIT = 0x283C,
26 BRAILLE_HYPHEN = 0x2824,
27 BRAILLE_RESTORE = 0x2820,
28 BRAILLE_SPACE = 0x2800,
29 SPACE = ' ';
30
31 private CapitalMode capitalMode = lowercase;
32 private NumericMode numericMode = no;
33 private boolean endsWithHyphen = false;
34
35 public void convert (Document document) {
36 Node body = Utils.getChild(document.getDocumentElement(), "body");
37 Node main = Utils.getFirstChildWithAttribute(body, "div", "id", "main");
38 while (main != null) {
39 Node volume = Utils.getFirstChildWithAttribute(main, "div", "class", "volume");
40 while (volume != null) {
41 Node page = Utils.getFirstChildWithAttribute(volume, "div", "class", "page");
42 while (page != null) {
43 Node braillePage = Utils.getFirstChildWithAttribute(page, "div", "class", "braille-page");
44 Node textPage = Utils.getFirstChildWithAttribute(page, "div", "class", "text-page");
45 Node brailleRow = Utils.getFirstChildWithAttribute(braillePage, "div", "class", "row");
46 Node textRow = Utils.getFirstChildWithAttribute(textPage, "div", "class", "row");
47 while (brailleRow != null && textRow != null) {
48 String braille = brailleRow.getTextContent();
49 String text = convert(braille);
50 textRow.setTextContent(text);
51
52 brailleRow = Utils.getNextSiblingWithAttribute(brailleRow, "div", "class", "row");
53 textRow = Utils.getNextSiblingWithAttribute(textRow, "div", "class", "row");
54 }
55 page = Utils.getNextSiblingWithAttribute(page, "div", "class", "page");
56 }
57 volume = Utils.getNextSiblingWithAttribute(volume, "div", "class", "volume");
58 }
59 main = Utils.getNextSiblingWithAttribute(main, "div", "id", "main");
60 }
61 }
62
63
64 private String convert (String braille) {
65 StringBuilder text = new StringBuilder();
66
67 for (char b : braille.toCharArray()) {
68
69
70 char t;
71 if (numericMode != no) {
72 t = Utils.convertBrailleNumeric(b);
73 } else {
74 t = Utils.convertBraille(b);
75 if (capitalMode != lowercase) {
76 t = String.valueOf(t).toUpperCase().charAt(0);
77 }
78 }
79 text.append(t);
80
81
82 if (b == BRAILLE_DIGIT) {
83 numericMode = start;
84 } else {
85 switch (numericMode) {
86 case start:
87 if (Utils.isBrailleMinus(b) || Utils.isBrailleDigit(b)) {
88 numericMode = yes;
89 }
90 break;
91 case yes:
92 if (Utils.isBrailleDecimalSeparator(b)) {
93 numericMode = decimalSign;
94 } else if (!Utils.isBrailleDigit(b)) {
95 numericMode = no;
96 }
97 break;
98 case decimalSign:
99 if (Utils.isBrailleDigit(b)) {
100 numericMode = yes;
101 } else if (Utils.isBrailleMoneyZeros(b)) {
102 numericMode = moneyZeros;
103 } else {
104 numericMode = no;
105 }
106 break;
107 case moneyZeros:
108 if (!Utils.isBrailleMoneyZeros(b)) {
109 numericMode = no;
110 }
111 break;
112 case no:
113 default:
114 break;
115 }
116 }
117
118
119 if (b == BRAILLE_CAPITAL) {
120 capitalMode = letter;
121 } else if (b == BRAILLE_CAPITAL_PERMANENT) {
122 switch(capitalMode) {
123 case lowercase: capitalMode = word;
124 case letter: capitalMode = word;
125 case word: capitalMode = permanent;
126 case permanent: capitalMode = word;
127 }
128 } else if (b == BRAILLE_RESTORE) {
129 numericMode = no;
130 capitalMode = lowercase;
131 } else if (isSpace(b)) {
132 numericMode = no;
133 if (capitalMode != permanent) {
134 capitalMode = lowercase;
135 }
136 } else if (capitalMode == letter) {
137 capitalMode = lowercase;
138 }
139
140
141 if (b == BRAILLE_HYPHEN) {
142 endsWithHyphen = true;
143 } else if (!isSpace(b)) {
144 endsWithHyphen = false;
145 }
146 }
147
148
149 numericMode = no;
150 if (!endsWithHyphen) {
151 if (capitalMode != permanent) {
152 capitalMode = lowercase;
153 }
154 }
155
156 return text.toString();
157 }
158
159 private static boolean isSpace(char b) {
160 return b == BRAILLE_SPACE || b == SPACE;
161 }
162 }