@@ -124,7 +124,8 @@ protected String guessLanguage(HotEntry entry) {
124124 }
125125
126126 static class Handler extends DefaultHandler {
127- StringBuilder content = new StringBuilder ();
127+ final StringBuilder content = new StringBuilder ();
128+ String contentType ;
128129
129130 final Stack <String > elements = new Stack <>();
130131
@@ -143,12 +144,15 @@ public void startElement(String uri, String localName, String qName, Attributes
143144 "language:" + language + ";" +
144145 "cregit-version:" + CREGIT_VERSION );
145146 } else {
146- out .println ("begin_" + qName );
147+ out .print ("begin_" + qName + " \n " );
147148 }
148149 }
149150
150151 if (content .length () > 0 ) {
151- out .println (content );
152+ String trimmed = content .toString ().trim ().replace ('\n' , ' ' ).replace ("\r " , "" );
153+ if (!trimmed .isEmpty ()) {
154+ out .print (contentType + "|" + trimmed + "\n " );
155+ }
152156 content .setLength (0 );
153157 }
154158 elements .push (qName );
@@ -157,21 +161,24 @@ public void startElement(String uri, String localName, String qName, Attributes
157161 @ Override
158162 public void endElement (String uri , String localName , String qName ) {
159163 if (content .length () > 0 ) {
160- out .println (content );
164+ String trimmed = content .toString ().trim ().replace ('\n' , ' ' ).replace ("\r " , "" );
165+ if (!trimmed .isEmpty ()) {
166+ out .print (contentType + "|" + trimmed + "\n " );
167+ }
161168 content .setLength (0 );
162169 }
163170 elements .pop ();
164171 if (elements .size () <= 1 ) {
165- out .println ("end_" + qName );
172+ out .print ("end_" + qName + " \n " );
166173 }
167174 }
168175
169176 @ Override
170177 public void characters (char [] ch , int start , int length ) {
171- String s = new String (ch , start , length ). trim (). replace ( '\n' , ' ' ) ;
178+ String s = new String (ch , start , length );
172179 if (!s .isEmpty ()) {
173180 if (content .length () == 0 ) {
174- content . append ( elements .peek ()). append ( "|" );
181+ contentType = elements .peek ();
175182 }
176183 content .append (s );
177184 }
0 commit comments