001 /*
002 * Copyright 2003-2008 the original author or authors.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * You are receiving this code free of charge, which represents many hours of
017 * effort from other individuals and corporations. As a responsible member
018 * of the community, you are asked (but not required) to donate any
019 * enhancements or improvements back to the community under a similar open
020 * source license. Thank you. -TMN
021 */
022 package groovyx.net.http;
023
024 import groovy.lang.Closure;
025 import groovy.util.XmlSlurper;
026 import groovy.util.slurpersupport.GPathResult;
027 import groovyx.net.http.HTTPBuilder.SendDelegate;
028
029 import java.io.IOException;
030 import java.io.InputStream;
031 import java.io.InputStreamReader;
032 import java.io.Reader;
033 import java.io.UnsupportedEncodingException;
034 import java.nio.charset.Charset;
035 import java.util.HashMap;
036 import java.util.List;
037 import java.util.Map;
038
039 import javax.xml.parsers.ParserConfigurationException;
040
041 import net.sf.json.JSON;
042 import net.sf.json.groovy.JsonSlurper;
043
044 import org.apache.commons.logging.Log;
045 import org.apache.commons.logging.LogFactory;
046 import org.apache.http.HttpResponse;
047 import org.apache.http.NameValuePair;
048 import org.apache.http.client.utils.URLEncodedUtils;
049 import org.codehaus.groovy.runtime.DefaultGroovyMethods;
050 import org.codehaus.groovy.runtime.MethodClosure;
051 import org.cyberneko.html.parsers.SAXParser;
052 import org.xml.sax.SAXException;
053
054
055 /**
056 * <p>Keeps track of response parsers for each content type. Each parser
057 * should should be a closure that accepts an {@link HttpResponse} instance,
058 * and returns whatever handler is appropriate for reading the response
059 * data for that content-type. For example, a plain-text response should
060 * probably be parsed with a <code>Reader</code>, while an XML response
061 * might be parsed by an XmlSlurper, which would then be passed to the
062 * response closure. </p>
063 *
064 * <p>Note that all methods in this class assume {@link HttpResponse#getEntity()}
065 * return a non-null value. It is the job of the HTTPBuilder instance to ensure
066 * a NullPointerException is not thrown by passing a response that contains no
067 * entity.</p>
068 *
069 * @see ContentType
070 */
071 public class ParserRegistry {
072
073 protected Closure defaultParser = new MethodClosure( this, "parseStream" );
074 protected final Log log = LogFactory.getLog( getClass() );
075
076 /**
077 * Helper method to get the charset from the response. This should be done
078 * when manually parsing any text response to ensure it is decoded using the
079 * correct charset. For instance:<pre>
080 * Reader reader = new InputStreamReader( resp.getEntity().getContent(),
081 * ParserRegistry.getCharset( resp ) );</pre>
082 * @param resp
083 */
084 public static String getCharset( HttpResponse resp ) {
085 NameValuePair charset = resp.getEntity().getContentType()
086 .getElements()[0].getParameterByName("charset");
087 return ( charset == null || charset.getValue().trim().equals("") ) ?
088 Charset.defaultCharset().name() : charset.getValue();
089 }
090
091 /**
092 * Helper method to get the content-type string from the response
093 * (no charset).
094 * @param resp
095 */
096 public static String getContentType( HttpResponse resp ) {
097 /* TODO how do we handle a very rude server who does not return a
098 content-type header? It could cause an NPE here. and in getCharset */
099 return resp.getEntity().getContentType()
100 .getElements()[0].getName();
101 }
102
103 /**
104 * Default parser used for binary data.
105 * @param resp
106 * @return an InputStream
107 * @throws IllegalStateException
108 * @throws IOException
109 */
110 public InputStream parseStream( HttpResponse resp ) throws IOException {
111 return resp.getEntity().getContent();
112 }
113
114 /**
115 * Default parser used to handle plain text data. The response text
116 * is decoded using the charset passed in the response content-type
117 * header.
118 * @param resp
119 * @return
120 * @throws UnsupportedEncodingException
121 * @throws IllegalStateException
122 * @throws IOException
123 */
124 public Reader parseText( HttpResponse resp ) throws IOException {
125 return new InputStreamReader( resp.getEntity().getContent(),
126 ParserRegistry.getCharset( resp ) );
127 }
128
129 /**
130 * Default parser used to decode a URL-encoded response.
131 * @param resp
132 * @return
133 * @throws IOException
134 */
135 public Map<String,String> parseForm( HttpResponse resp ) throws IOException {
136 List<NameValuePair> params = URLEncodedUtils.parse( resp.getEntity() );
137 Map<String,String> paramMap = new HashMap<String,String>(params.size());
138 for ( NameValuePair param : params )
139 paramMap.put( param.getName(), param.getValue() );
140 return paramMap;
141 }
142
143 /**
144 * Parse an HTML document by passing it through the NekoHTML parser.
145 * @see SAXParser
146 * @see XmlSlurper#parse(Reader)
147 * @param resp HTTP response from which to parse content
148 * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)}
149 * @throws IOException
150 * @throws SAXException
151 */
152 public GPathResult parseHTML( HttpResponse resp ) throws IOException, SAXException {
153 return new XmlSlurper( new org.cyberneko.html.parsers.SAXParser() )
154 .parse( parseText( resp ) );
155 }
156
157 /**
158 * Default parser used to decode an XML response.
159 * @see XmlSlurper#parse(Reader)
160 * @param resp HTTP response from which to parse content
161 * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)}
162 * @throws IOException
163 * @throws SAXException
164 * @throws ParserConfigurationException
165 */
166 public GPathResult parseXML( HttpResponse resp ) throws IOException, SAXException, ParserConfigurationException {
167 return new XmlSlurper().parse( parseText( resp ) );
168 }
169
170 /**
171 * Default parser used to decode a JSON response.
172 * @param resp
173 * @return
174 * @throws IOException
175 */
176 public JSON parseJSON( HttpResponse resp ) throws IOException {
177 // there is a bug in the JsonSlurper.parse method...
178 String jsonTxt = DefaultGroovyMethods.getText( parseText( resp ) );
179 return new JsonSlurper().parseText( jsonTxt );
180 }
181
182 protected Map<String,Closure> registeredParsers = buildDefaultParserMap();
183
184 /**
185 * Register a new parser for the given content-type. The parser closure
186 * should accept an {@link HttpResponse} argument and return a type suitable
187 * to be passed to a {@link SendDelegate#getResponse() response handler}.
188 * The value returned from the parser closure is always the second parameter
189 * of the response handler closure.
190 * @param contentType <code>content-type</code> string
191 * @param closure code that will parse the HttpResponse and return parsed
192 * data to the response handler.
193 */
194 public void register( String contentType, Closure closure ) {
195 registeredParsers.put( contentType, closure );
196 }
197
198 /* Retrieve a parser for the given response content-type string. This
199 * should usually not be called by a user. The appropriate parser will
200 * be resolved prior to executing the response handler.
201 * @param contentType
202 * @return parser that can interpret the given response content type,
203 * or the default parser if no parser is registered for the given
204 * content-type. It should NOT return a null value.
205 */
206 Closure get( String contentType ) {
207 Closure parser = registeredParsers.get(contentType);
208 if ( parser == null ) {
209 log.warn( "Cannot find parser for content-type: " + contentType
210 + " -- using default parser.");
211 parser = defaultParser;
212 }
213 return parser;
214 }
215
216 /**
217 * Returns a map of default parsers. Override this method to change
218 * what parsers are registered by default. You can of course call
219 * <code>super.buildDefaultParserMap()</code> and then add or remove
220 * from that result as well.
221 */
222 protected Map<String,Closure> buildDefaultParserMap() {
223 Map<String,Closure> parsers = new HashMap<String,Closure>();
224
225 parsers.put( ContentType.BINARY.toString(), new MethodClosure( this, "parseStream" ) );
226 parsers.put( ContentType.TEXT.toString(), new MethodClosure(this,"parseText") );
227 parsers.put( ContentType.URLENC.toString(), new MethodClosure(this,"parseForm") );
228 parsers.put( ContentType.HTML.toString(), new MethodClosure(this,"parseHTML") );
229
230 Closure pClosure = new MethodClosure(this,"parseXML");
231 for ( String ct : ContentType.XML.getContentTypeStrings() )
232 parsers.put( ct, pClosure );
233
234 pClosure = new MethodClosure(this,"parseJSON");
235 for ( String ct : ContentType.JSON.getContentTypeStrings() )
236 parsers.put( ct, pClosure );
237
238 return parsers;
239 }
240 }