1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.zmpp.vmutil;
24
25 import java.util.ArrayList;
26 import java.util.List;
27
28 import org.zmpp.base.MemoryReadAccess;
29
30 /***
31 * This class provides conversion for the ZSCII character encoding into
32 * the Java character system.
33 *
34 * @author Wei-ju Wu
35 * @version 1.0
36 */
37 public class ZsciiConverter {
38
39 public static final byte CHAR_0 = 0x00;
40 public static final byte CHAR_1 = 0x01;
41 public static final byte SHIFT_2 = 0x02;
42 public static final byte SHIFT_3 = 0x03;
43 public static final byte SHIFT_4 = 0x04;
44 public static final byte SHIFT_5 = 0x05;
45 public static final byte CHAR_6 = 0x06;
46
47 public static final String A0CHARS = "abcdefghijklmnopqrstuvwxyz";
48 public static final String A1CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
49 public static final String A2CHARS = " \n0123456789.,!?_#'\"///-:()";
50
51 /***
52 * This interface defines the abstract access to an abbreviations
53 * table in memory, this will be used for decoding if needed.
54 */
55 public interface AbbreviationsTable {
56
57 int getWordAddress(int entryNum);
58 }
59
60 /***
61 * Defines the possible alphabets here.
62 */
63 public enum Alphabet { A0, A1, A2 }
64
65 /***
66 * The story file version this converter works on.
67 */
68 private int version;
69
70 /***
71 * The abbreviations table used for decoding.
72 */
73 private AbbreviationsTable abbreviations;
74
75 /***
76 * Constructor.
77 * @param version Story file version
78 * @param abbreviations the abbreviations table used for decoding
79 */
80 public ZsciiConverter(int version, AbbreviationsTable abbreviations) {
81
82 this.version = version;
83 this.abbreviations = abbreviations;
84 }
85
86 /***
87 * Returns the initial alphabet of this converter.
88 *
89 * @return the initial alphabet
90 */
91 public Alphabet getInitialAlphabet() {
92
93 return Alphabet.A0;
94 }
95
96 /***
97 * Returns the story file version.
98 *
99 * @return the story file version
100 */
101 public int getVersion() {
102
103 return version;
104 }
105
106 /***
107 * Performs a ZSCII to Unicode conversion at the specified position of
108 * the given memory object.
109 *
110 * @param memaccess a MemoryReadAccess object
111 * @param address the address of a ZSCII string
112 * @return a converted Unicode string
113 */
114 public String convert(MemoryReadAccess memaccess, int address) {
115
116 StringBuilder builder = new StringBuilder();
117 Alphabet currentAlphabet = getInitialAlphabet();
118
119 byte[] zbytes = extractZbytes(memaccess, address);
120 byte zchar;
121 for (int i = 0; i < zbytes.length; i++) {
122
123 zchar = zbytes[i];
124 if (isShiftCharacter(zchar)) {
125
126 currentAlphabet = ZsciiConverter.shiftFrom(currentAlphabet, zchar);
127
128
129 } else if (isAbbreviation(zchar)) {
130
131 if (i < (zbytes.length - 1)) {
132
133 i++;
134
135
136
137 if (abbreviations != null) {
138
139 int x = zbytes[i];
140 int entryNum = 32 * (zchar - 1) + x;
141 int entryAddress = abbreviations.getWordAddress(entryNum);
142 String abbrev = convert(memaccess, entryAddress);
143 builder.append(abbrev);
144 currentAlphabet = getInitialAlphabet();
145
146 }
147 } else {
148
149
150
151 }
152
153 } else if (isSwitchTo10Bit(currentAlphabet, zchar)) {
154
155 decode10BitZchar(builder, zbytes[i + 1], zbytes[i + 2]);
156 i += 2;
157 currentAlphabet = getInitialAlphabet();
158
159
160 } else {
161
162 decodeZchar(builder, currentAlphabet, zchar);
163 currentAlphabet = getInitialAlphabet();
164
165
166
167
168
169
170
171
172 }
173 }
174 return builder.toString();
175 }
176
177 /***
178 * Decodes the given byte value to the specified buffer using the working
179 * alphabet.
180 *
181 * @param builder the buffer to print into
182 * @param alphabet the working alphabet
183 * @param b a z character, needs to be a non-shift character
184 */
185 public static void decodeZchar(StringBuilder builder, Alphabet alphabet,
186 byte b) {
187
188 if (isPrintable(b)) {
189
190 builder.append(decode(alphabet, b));
191 }
192 }
193
194 /***
195 * Decodes a printable character to a unicode character.
196 *
197 * @param alphabet the work alphabet
198 * @param zchar the ZSCII character to decode
199 * @return the Unicode character
200 */
201 public static char decode(Alphabet alphabet, short zchar) {
202
203 if (zchar == 0) return ' ';
204 if (isAsciiCharacter((byte) zchar)) {
205
206 return (char) zchar;
207
208 } else if (isZsciiCharacter((byte) zchar)) {
209 switch (alphabet) {
210
211 case A0:
212 return A0CHARS.charAt(zchar - 6);
213 case A1:
214 return A1CHARS.charAt(zchar - 6);
215 case A2:
216 return A2CHARS.charAt(zchar - 6);
217 default:
218 break;
219 }
220 }
221 return 0;
222 }
223
224 /***
225 * Returns a new alphabet for a given shift character. If the character
226 * is not a shift character, the old alphabet is returned.
227 *
228 * @param alphabet the start alphabet
229 * @param shiftChar the shift character
230 * @return the shifted alphabet
231 */
232 public static Alphabet shiftFrom(Alphabet alphabet, byte shiftChar) {
233
234 switch (shiftChar) {
235 case SHIFT_2:
236 case SHIFT_4:
237
238 if (alphabet == Alphabet.A0) {
239
240 return Alphabet.A1;
241
242 } else if (alphabet == Alphabet.A1) {
243
244 return Alphabet.A2;
245
246 } else if (alphabet == Alphabet.A2) {
247
248 return Alphabet.A0;
249 }
250 break;
251 case SHIFT_3:
252 case SHIFT_5:
253
254 if (alphabet == Alphabet.A0) {
255
256 return Alphabet.A2;
257
258 } else if (alphabet == Alphabet.A1) {
259
260 return Alphabet.A0;
261
262 } else if (alphabet == Alphabet.A2) {
263
264 return Alphabet.A1;
265 }
266 break;
267 default:
268 }
269 return alphabet;
270 }
271
272
273
274
275 /***
276 * Determines if the specified character marks a abbreviation.
277 *
278 * @param zchar the zchar
279 * @return true if abbreviation, false, otherwise
280 */
281 private static boolean isAbbreviation(short zchar) {
282
283 return 1 <= zchar && zchar <= 3;
284 }
285
286
287 /***
288 * Determines the last word in a z sequence. The last word has the
289 * MSB set.
290 *
291 * @param zword the zword
292 * @return true if zword is the last word, false, otherwise
293 */
294 private static boolean isEndWord(short zword) {
295
296 return (zword & 0x8000) > 0;
297 }
298
299 /***
300 * Determines if the given parameter is a ZSCII shift character.
301 *
302 * @param zchar a byte value
303 * @return true, if the parameter is a shift, false, otherwise
304 */
305 private static boolean isShiftCharacter(byte zchar) {
306
307 return SHIFT_4 <= zchar && zchar <= SHIFT_5;
308 }
309
310 /***
311 * Determines if the given byte falls in the ASCII range.
312 *
313 * @param zchar a byte value
314 * @return true, if the value falls in the ASCII range, false, else
315 */
316 private static boolean isAsciiCharacter(byte zchar) {
317
318 return 32 <= zchar && zchar <= 126;
319 }
320
321 /***
322 * Determines if the given byte value falls within the ZSCII range.
323 *
324 * @param zchar the zchar value
325 * @return true if the value is in the ZSCII range, false, otherwise
326 */
327 private static boolean isZsciiCharacter(byte zchar) {
328
329 return 6 <= zchar && zchar <= 31;
330 }
331
332
333 /***
334 * Returns true if the zchar parameter represents a printable character.
335 *
336 * @param zchar a ZSCII character
337 * @return true if printable, false, otherwise
338 */
339 private static boolean isPrintable(byte zchar) {
340
341 return !isShiftCharacter(zchar);
342 }
343
344 /***
345 * Returns true if the specified zchar indicates the next 10 bits as
346 * one character.
347 *
348 * @param zchar a zchar
349 * @return true if the next 2 zchars should be treated as one, false, else
350 */
351 private static boolean isSwitchTo10Bit(Alphabet alphabet, byte zchar) {
352
353 return alphabet == Alphabet.A2 && zchar == 6;
354 }
355
356 /***
357 * This function unfortunately generates a List object on each invocation,
358 * the advantage is that it will return all the characters of the Z string.
359 *
360 * @param memaccess the memory access object
361 * @param address the address of the z string
362 * @return the z characters of the string
363 */
364 private static byte[] extractZbytes(MemoryReadAccess memaccess,
365 int address) {
366
367 short zword = 0;
368 int currentAddr = address;
369 List<byte[]> byteList = new ArrayList<byte[]>();
370
371 do {
372 zword = memaccess.readShort(currentAddr);
373 byteList.add(extractBytes(zword));
374 currentAddr += 2;
375 } while (!isEndWord(zword));
376
377 byte[] result = new byte[byteList.size() * 3];
378 int i = 0;
379 for (byte[] triplet : byteList) {
380 for (byte b : triplet) {
381 result[i++] = b;
382 }
383 }
384 return result;
385 }
386
387 /***
388 * Extracts three 5 bit fields from the given 16 bit word and returns
389 * an array of three bytes containing these characters.
390 *
391 * @param zword a 16 bit word
392 * @return an array of three bytes containing the three 5-bit ZSCII characters
393 * encoded in the word
394 */
395 private static byte[] extractBytes(short zword) {
396
397 byte[] result = new byte[3];
398 result[2] = (byte) (zword & 0x1f);
399 result[1] = (byte) ((zword >> 5) & 0x1f);
400 result[0] = (byte) ((zword >> 10) & 0x1f);
401 return result;
402 }
403
404 /***
405 * Decodes a 10 bit zchar, the current implementation simply treats it
406 * as an ASCII.
407 *
408 * @param builder the StringBuilder to write to
409 * @param top the byte holding the top 5 bit of the zchar
410 * @param bottom the byte holding the bottom 5 bit of the zchar
411 */
412 private static void decode10BitZchar(StringBuilder builder,
413 byte top, byte bottom) {
414
415 short zchar = (short) (top << 5 | bottom);
416 builder.append((char) zchar);
417 }
418 }