Skip to content

Incorrect decoding of non-ASCII characters in response #6

@GoogleCodeExporter

Description

@GoogleCodeExporter
Reading bytes one-by-one and casting them to char won't work for non-ASCII 
characters. 

Here is a patch which fixes this:

Index: src/main/java/org/xeustechnologies/googleapi/spelling/SpellChecker.java
===================================================================
--- 
src/main/java/org/xeustechnologies/googleapi/spelling/SpellChecker.java (revisio
n 17)
+++ 
src/main/java/org/xeustechnologies/googleapi/spelling/SpellChecker.java (working
 copy)
@@ -22,10 +22,12 @@
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.InetSocketAddress;
 import java.net.Proxy;
 import java.net.URL;
 import java.net.URLConnection;
+import java.nio.charset.Charset;

 import javax.xml.bind.JAXBContext;
 import javax.xml.bind.JAXBException;
@@ -98,20 +100,16 @@
             if( logger.isDebugEnabled() )
                 logger.debug( new String( requestData ) );

-            int c = 0;
-            StringBuffer buff = new StringBuffer();
-            BufferedInputStream responseStream = new BufferedInputStream( 
conn.getInputStream() );
+            InputStream responseStream = conn.getInputStream();

-            while(( c = responseStream.read() ) != -1) {
-                buff.append( (char) c );
-            }
+            byte[] buff = toByteArray(responseStream);

             responseStream.close();

             if( logger.isDebugEnabled() )
-                logger.debug( buff );
+                logger.debug( new String(buff, Charset.forName("UTF-8")) );

-            return unmarshall( buff.toString().getBytes() );
+            return unmarshall( buff );

         } catch (Exception e) {
             logger.error( e, e );
@@ -167,6 +165,19 @@
         return uri.toString();
     }

+   // could add dependency to Apache Commons, 
+   // but may not be worth it for one method
+   private static byte[] toByteArray(InputStream in) throws IOException {
+       int BUFFER_SIZE = 4096;
+       ByteArrayOutputStream out = new ByteArrayOutputStream();
+       byte[] buffer = new byte[BUFFER_SIZE];
+       int read = 0;
+       while ((read = in.read(buffer)) != -1) {
+           out.write(buffer, 0, read);
+       }
+       return out.toByteArray();
+   }
+
     public SpellChecker(Language language) {
         this.language = language;
     }
Index: src/main/java/org/xeustechnologies/googleapi/spelling/Language.java
===================================================================
--- 
src/main/java/org/xeustechnologies/googleapi/spelling/Language.java (revision 
17)
+++ 
src/main/java/org/xeustechnologies/googleapi/spelling/Language.java (working 
copy)
@@ -23,7 +23,7 @@
  */
 public enum Language {
     DANISH("da"), GERMAN("de"), ENGLISH("en"), SPANISH("es"), FINNISH("fi"), FRENCH("fr"), ITALIAN("it"), DUTCH("nl"), POLISH(
-            "pl"), PORTUGUESE("pt"), SWEDISH("sv");
+            "pl"), PORTUGUESE("pt"), SWEDISH("sv"), RUSSIAN("ru");

     private final String code;


Original issue reported on code.google.com by alexey.v...@gmail.com on 8 Mar 2013 at 11:09

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions