View Javadoc

1   /*
2    * $Id: ZsciiConverter.java,v 1.11 2005/10/20 23:36:25 weiju Exp $
3    * 
4    * Created on 2005/09/23
5    * Copyright 2005 by Wei-ju Wu
6    *
7    * This file is part of The Z-machine Preservation Project (ZMPP).
8    *
9    * ZMPP is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU General Public License as published by
11   * the Free Software Foundation; either version 2 of the License, or
12   * (at your option) any later version.
13   *
14   * ZMPP is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU General Public License for more details.
18   *
19   * You should have received a copy of the GNU General Public License
20   * along with ZMPP; if not, write to the Free Software
21   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
22   */
23  package org.zmpp.vmutil;
24  
25  import java.util.ArrayList;
26  import java.util.List;
27  
28  import org.zmpp.base.MemoryReadAccess;
29  
30  /***
31   * This class provides conversion for the ZSCII character encoding into
32   * the Java character system.
33   * 
34   * @author Wei-ju Wu
35   * @version 1.0
36   */
37  public class ZsciiConverter {
38  
39    public static final byte CHAR_0 = 0x00; // Space = 32
40    public static final byte CHAR_1 = 0x01; // Newline = 13
41    public static final byte SHIFT_2 = 0x02; // Shift 1
42    public static final byte SHIFT_3 = 0x03; // Shift 2
43    public static final byte SHIFT_4 = 0x04; // Shift lock 1
44    public static final byte SHIFT_5 = 0x05; // Shift lock 2
45    public static final byte CHAR_6 = 0x06; //
46    
47    public static final String A0CHARS = "abcdefghijklmnopqrstuvwxyz";
48    public static final String A1CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
49    public static final String A2CHARS = " \n0123456789.,!?_#'\"///-:()";
50    
51    /***
52     * This interface defines the abstract access to an abbreviations
53     * table in memory, this will be used for decoding if needed. 
54     */
55    public interface AbbreviationsTable {
56    
57      int getWordAddress(int entryNum);
58    }
59    
60    /***
61     * Defines the possible alphabets here.
62     */
63    public enum Alphabet {  A0, A1, A2 }
64    
65    /***
66     * The story file version this converter works on.
67     */
68    private int version;
69    
70    /***
71     * The abbreviations table used for decoding.
72     */
73    private AbbreviationsTable abbreviations;
74    
75    /***
76     * Constructor.
77     * @param version Story file version
78     * @param abbreviations the abbreviations table used for decoding
79     */
80    public ZsciiConverter(int version, AbbreviationsTable abbreviations) {
81      
82      this.version = version;
83      this.abbreviations = abbreviations;
84    }
85    
86    /***
87     * Returns the initial alphabet of this converter.
88     * 
89     * @return the initial alphabet
90     */
91    public Alphabet getInitialAlphabet() {
92      
93      return Alphabet.A0;
94    }
95    
96    /***
97     * Returns the story file version.
98     * 
99     * @return the story file version
100    */
101   public int getVersion() {
102     
103     return version;
104   }
105 
106   /***
107    * Performs a ZSCII to Unicode conversion at the specified position of
108    * the given memory object.
109    * 
110    * @param memaccess a MemoryReadAccess object
111    * @param address the address of a ZSCII string
112    * @return a converted Unicode string 
113    */
114   public String convert(MemoryReadAccess memaccess, int address) {
115     
116     StringBuilder builder = new StringBuilder();
117     Alphabet currentAlphabet = getInitialAlphabet();
118     
119     byte[] zbytes = extractZbytes(memaccess, address);
120     byte zchar;
121     for (int i = 0; i < zbytes.length; i++) {
122       
123       zchar = zbytes[i];
124       if (isShiftCharacter(zchar)) {
125         
126         currentAlphabet = ZsciiConverter.shiftFrom(currentAlphabet, zchar);
127         //System.out.printf("i: %d c: %x shift\n", i, zchar);
128         
129       } else if (isAbbreviation(zchar)) {
130         
131         if (i < (zbytes.length - 1)) { // this happens in Zork I
132           
133           i++; // retrieve the next byte to determine the abbreviation
134         
135           // the abbreviations table could be null, simply skip that part in this
136           // case
137           if (abbreviations != null) {
138             //System.out.println("abbrev");
139             int x = zbytes[i];
140             int entryNum = 32 * (zchar - 1) + x;
141             int entryAddress = abbreviations.getWordAddress(entryNum);
142             String abbrev = convert(memaccess, entryAddress);
143             builder.append(abbrev);
144             currentAlphabet = getInitialAlphabet();
145             //System.out.printf("i: %d c: %x abbr: [%s]\n", i, zchar, abbrev);
146           }
147         } else {
148           
149           //System.out.printf("strange abbreviation: %s, char is: %d\n",
150           //    builder.toString(), (int) zchar);
151         }
152         
153       } else if (isSwitchTo10Bit(currentAlphabet, zchar)) {
154         
155         decode10BitZchar(builder, zbytes[i + 1], zbytes[i + 2]);
156         i += 2; // skip the three characters read (including the loop increment)
157         currentAlphabet = getInitialAlphabet();
158         //System.out.printf("i: %d 10bit: %x\n", i, ((zbytes[i+1] << 8) & zbytes[i+2]) );
159         
160       } else {
161         
162         decodeZchar(builder, currentAlphabet, zchar);
163         currentAlphabet = getInitialAlphabet();
164         /*
165         // debug
166         if (isPrintable(zchar)) {
167           System.out.printf("i: %d code: %x char: '%c'\n", i, zchar, decode(currentAlphabet, zchar));
168         } else {
169           
170           System.out.printf("i: %d (not printable)\n", i, zchar);
171         }*/
172       }
173     }
174     return builder.toString();
175   }
176 
177   /***
178    * Decodes the given byte value to the specified buffer using the working
179    * alphabet.
180    * 
181    * @param builder the buffer to print into
182    * @param alphabet the working alphabet
183    * @param b a z character, needs to be a non-shift character
184    */
185   public static void decodeZchar(StringBuilder builder, Alphabet alphabet,
186                                  byte b) {
187     
188     if (isPrintable(b)) {
189       
190       builder.append(decode(alphabet, b));
191     }
192   }
193   
194   /***
195    * Decodes a printable character to a unicode character. 
196    *
197    * @param alphabet the work alphabet
198    * @param zchar the ZSCII character to decode
199    * @return the Unicode character
200    */
201   public static char decode(Alphabet alphabet, short zchar) {
202     
203     if (zchar == 0) return ' ';
204     if (isAsciiCharacter((byte) zchar)) {
205       
206       return (char) zchar;
207       
208     } else if (isZsciiCharacter((byte) zchar)) {
209       switch (alphabet) {
210     
211         case A0:
212           return A0CHARS.charAt(zchar - 6);
213         case A1:
214           return A1CHARS.charAt(zchar - 6);
215         case A2:        
216           return A2CHARS.charAt(zchar - 6);
217         default:
218           break;
219       }
220     }
221     return 0;
222   }
223   
224   /***
225    * Returns a new alphabet for a given shift character. If the character
226    * is not a shift character, the old alphabet is returned.
227    * 
228    * @param alphabet the start alphabet
229    * @param shiftChar the shift character
230    * @return the shifted alphabet
231    */
232   public static Alphabet shiftFrom(Alphabet alphabet, byte shiftChar) {
233     
234     switch (shiftChar) {
235       case SHIFT_2:
236       case SHIFT_4:
237       
238         if (alphabet == Alphabet.A0) {
239         
240           return Alphabet.A1;
241         
242         } else if (alphabet == Alphabet.A1) {
243         
244           return Alphabet.A2;
245         
246         } else if (alphabet == Alphabet.A2) {
247         
248           return Alphabet.A0;
249         }
250         break;
251       case SHIFT_3:
252       case SHIFT_5:
253       
254         if (alphabet == Alphabet.A0) {
255         
256           return Alphabet.A2;
257         
258         } else if (alphabet == Alphabet.A1) {
259         
260           return Alphabet.A0;
261           
262         } else if (alphabet == Alphabet.A2) {
263         
264           return Alphabet.A1;
265         }
266         break;
267       default:        
268     }
269     return alphabet;
270   }
271 
272   // ***********************************************************************
273   // ******* Private
274   // *****************************
275   /***
276    * Determines if the specified character marks a abbreviation. 
277    * 
278    * @param zchar the zchar
279    * @return true if abbreviation, false, otherwise
280    */
281   private static boolean isAbbreviation(short zchar) {
282     
283     return 1 <= zchar && zchar <= 3;
284   }
285 
286 
287   /***
288    * Determines the last word in a z sequence. The last word has the
289    * MSB set.
290    * 
291    * @param zword the zword
292    * @return true if zword is the last word, false, otherwise
293    */
294   private static boolean isEndWord(short zword) {
295     
296     return (zword & 0x8000) > 0;
297   }
298   
299   /***
300    * Determines if the given parameter is a ZSCII shift character.
301    * 
302    * @param zchar a byte value
303    * @return true, if the parameter is a shift, false, otherwise
304    */
305   private static boolean isShiftCharacter(byte zchar) {
306     
307     return SHIFT_4 <= zchar && zchar <= SHIFT_5;
308   }
309   
310   /***
311    * Determines if the given byte falls in the ASCII range.
312    * 
313    * @param zchar a byte value
314    * @return true, if the value falls in the ASCII range, false, else
315    */
316   private static boolean isAsciiCharacter(byte zchar) {
317     
318     return 32 <= zchar && zchar <= 126;
319   }
320   
321   /***
322    * Determines if the given byte value falls within the ZSCII range.
323    * 
324    * @param zchar the zchar value
325    * @return true if the value is in the ZSCII range, false, otherwise
326    */
327   private static boolean isZsciiCharacter(byte zchar) {
328     
329     return 6 <= zchar && zchar <= 31;
330   }
331   
332   
333   /***
334    * Returns true if the zchar parameter represents a printable character.
335    * 
336    * @param zchar a ZSCII character
337    * @return true if printable, false, otherwise
338    */
339   private static boolean isPrintable(byte zchar) {
340     
341     return !isShiftCharacter(zchar);
342   }
343   
344   /***
345    * Returns true if the specified zchar indicates the next 10 bits as
346    * one character.
347    * 
348    * @param zchar a zchar
349    * @return true if the next 2 zchars should be treated as one, false, else
350    */
351   private static boolean isSwitchTo10Bit(Alphabet alphabet, byte zchar) {
352    
353     return alphabet == Alphabet.A2 && zchar == 6;
354   }
355 
356   /***
357    * This function unfortunately generates a List object on each invocation,
358    * the advantage is that it will return all the characters of the Z string.
359    *  
360    * @param memaccess the memory access object
361    * @param address the address of the z string
362    * @return the z characters of the string
363    */
364   private static byte[] extractZbytes(MemoryReadAccess memaccess,
365                                        int address) {
366     
367     short zword = 0;
368     int currentAddr = address;
369     List<byte[]> byteList = new ArrayList<byte[]>();
370     
371     do {
372       zword = memaccess.readShort(currentAddr);
373       byteList.add(extractBytes(zword));
374       currentAddr += 2; // increment pointer      
375     } while (!isEndWord(zword));
376     
377     byte[] result = new byte[byteList.size() * 3];
378     int i = 0;
379     for (byte[] triplet : byteList) {
380       for (byte b : triplet) {
381         result[i++] = b;
382       }
383     }
384     return result;
385   }
386   
387   /***
388    * Extracts three 5 bit fields from the given 16 bit word and returns
389    * an array of three bytes containing these characters.
390    * 
391    * @param zword a 16 bit word
392    * @return an array of three bytes containing the three 5-bit ZSCII characters
393    * encoded in the word
394    */
395   private static byte[] extractBytes(short zword) {
396     
397     byte[] result = new byte[3];
398     result[2] = (byte) (zword & 0x1f);
399     result[1] = (byte) ((zword >> 5) & 0x1f);
400     result[0] = (byte) ((zword >> 10) & 0x1f);
401     return result;
402   }
403   
404   /***
405    * Decodes a 10 bit zchar, the current implementation simply treats it
406    * as an ASCII.
407    * 
408    * @param builder the StringBuilder to write to
409    * @param top the byte holding the top 5 bit of the zchar
410    * @param bottom the byte holding the bottom 5 bit of the zchar
411    */  
412   private static void decode10BitZchar(StringBuilder builder,
413                                        byte top, byte bottom) {
414     
415     short zchar = (short) (top << 5 | bottom);
416     builder.append((char) zchar);
417   }
418 }