1 /*
2 * Copyright 2003-2008 the original author or authors.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * You are receiving this code free of charge, which represents many hours of
17 * effort from other individuals and corporations. As a responsible member
18 * of the community, you are asked (but not required) to donate any
19 * enhancements or improvements back to the community under a similar open
20 * source license. Thank you. -TMN
21 */
22 package groovyx.net.http;
23
24 import groovy.lang.Closure;
25 import groovy.util.XmlSlurper;
26 import groovy.util.slurpersupport.GPathResult;
27 import groovyx.net.http.HTTPBuilder.SendDelegate;
28
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.InputStreamReader;
32 import java.io.Reader;
33 import java.io.UnsupportedEncodingException;
34 import java.nio.charset.Charset;
35 import java.util.HashMap;
36 import java.util.List;
37 import java.util.Map;
38
39 import javax.xml.parsers.ParserConfigurationException;
40
41 import net.sf.json.JSON;
42 import net.sf.json.groovy.JsonSlurper;
43
44 import org.apache.commons.logging.Log;
45 import org.apache.commons.logging.LogFactory;
46 import org.apache.http.HttpResponse;
47 import org.apache.http.NameValuePair;
48 import org.apache.http.client.utils.URLEncodedUtils;
49 import org.codehaus.groovy.runtime.DefaultGroovyMethods;
50 import org.codehaus.groovy.runtime.MethodClosure;
51 import org.cyberneko.html.parsers.SAXParser;
52 import org.xml.sax.SAXException;
53
54
55 /**
56 * <p>Keeps track of response parsers for each content type. Each parser
57 * should should be a closure that accepts an {@link HttpResponse} instance,
58 * and returns whatever handler is appropriate for reading the response
59 * data for that content-type. For example, a plain-text response should
60 * probably be parsed with a <code>Reader</code>, while an XML response
61 * might be parsed by an XmlSlurper, which would then be passed to the
62 * response closure. </p>
63 *
64 * <p>Note that all methods in this class assume {@link HttpResponse#getEntity()}
65 * return a non-null value. It is the job of the HTTPBuilder instance to ensure
66 * a NullPointerException is not thrown by passing a response that contains no
67 * entity.</p>
68 *
69 * @see ContentType
70 */
71 public class ParserRegistry {
72
73 protected Closure defaultParser = new MethodClosure( this, "parseStream" );
74 protected final Log log = LogFactory.getLog( getClass() );
75
76 /**
77 * Helper method to get the charset from the response. This should be done
78 * when manually parsing any text response to ensure it is decoded using the
79 * correct charset. For instance:<pre>
80 * Reader reader = new InputStreamReader( resp.getEntity().getContent(),
81 * ParserRegistry.getCharset( resp ) );</pre>
82 * @param resp
83 */
84 public static String getCharset( HttpResponse resp ) {
85 NameValuePair charset = resp.getEntity().getContentType()
86 .getElements()[0].getParameterByName("charset");
87 return ( charset == null || charset.getValue().trim().equals("") ) ?
88 Charset.defaultCharset().name() : charset.getValue();
89 }
90
91 /**
92 * Helper method to get the content-type string from the response
93 * (no charset).
94 * @param resp
95 */
96 public static String getContentType( HttpResponse resp ) {
97 /* TODO how do we handle a very rude server who does not return a
98 content-type header? It could cause an NPE here. and in getCharset */
99 return resp.getEntity().getContentType()
100 .getElements()[0].getName();
101 }
102
103 /**
104 * Default parser used for binary data.
105 * @param resp
106 * @return an InputStream
107 * @throws IllegalStateException
108 * @throws IOException
109 */
110 public InputStream parseStream( HttpResponse resp ) throws IOException {
111 return resp.getEntity().getContent();
112 }
113
114 /**
115 * Default parser used to handle plain text data. The response text
116 * is decoded using the charset passed in the response content-type
117 * header.
118 * @param resp
119 * @return
120 * @throws UnsupportedEncodingException
121 * @throws IllegalStateException
122 * @throws IOException
123 */
124 public Reader parseText( HttpResponse resp ) throws IOException {
125 return new InputStreamReader( resp.getEntity().getContent(),
126 ParserRegistry.getCharset( resp ) );
127 }
128
129 /**
130 * Default parser used to decode a URL-encoded response.
131 * @param resp
132 * @return
133 * @throws IOException
134 */
135 public Map<String,String> parseForm( HttpResponse resp ) throws IOException {
136 List<NameValuePair> params = URLEncodedUtils.parse( resp.getEntity() );
137 Map<String,String> paramMap = new HashMap<String,String>(params.size());
138 for ( NameValuePair param : params )
139 paramMap.put( param.getName(), param.getValue() );
140 return paramMap;
141 }
142
143 /**
144 * Parse an HTML document by passing it through the NekoHTML parser.
145 * @see SAXParser
146 * @see XmlSlurper#parse(Reader)
147 * @param resp HTTP response from which to parse content
148 * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)}
149 * @throws IOException
150 * @throws SAXException
151 */
152 public GPathResult parseHTML( HttpResponse resp ) throws IOException, SAXException {
153 return new XmlSlurper( new org.cyberneko.html.parsers.SAXParser() )
154 .parse( parseText( resp ) );
155 }
156
157 /**
158 * Default parser used to decode an XML response.
159 * @see XmlSlurper#parse(Reader)
160 * @param resp HTTP response from which to parse content
161 * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)}
162 * @throws IOException
163 * @throws SAXException
164 * @throws ParserConfigurationException
165 */
166 public GPathResult parseXML( HttpResponse resp ) throws IOException, SAXException, ParserConfigurationException {
167 return new XmlSlurper().parse( parseText( resp ) );
168 }
169
170 /**
171 * Default parser used to decode a JSON response.
172 * @param resp
173 * @return
174 * @throws IOException
175 */
176 public JSON parseJSON( HttpResponse resp ) throws IOException {
177 // there is a bug in the JsonSlurper.parse method...
178 String jsonTxt = DefaultGroovyMethods.getText( parseText( resp ) );
179 return new JsonSlurper().parseText( jsonTxt );
180 }
181
182 protected Map<String,Closure> registeredParsers = buildDefaultParserMap();
183
184 /**
185 * Register a new parser for the given content-type. The parser closure
186 * should accept an {@link HttpResponse} argument and return a type suitable
187 * to be passed to a {@link SendDelegate#getResponse() response handler}.
188 * The value returned from the parser closure is always the second parameter
189 * of the response handler closure.
190 * @param contentType <code>content-type</code> string
191 * @param closure code that will parse the HttpResponse and return parsed
192 * data to the response handler.
193 */
194 public void register( String contentType, Closure closure ) {
195 registeredParsers.put( contentType, closure );
196 }
197
198 /* Retrieve a parser for the given response content-type string. This
199 * should usually not be called by a user. The appropriate parser will
200 * be resolved prior to executing the response handler.
201 * @param contentType
202 * @return parser that can interpret the given response content type,
203 * or the default parser if no parser is registered for the given
204 * content-type. It should NOT return a null value.
205 */
206 Closure get( String contentType ) {
207 Closure parser = registeredParsers.get(contentType);
208 if ( parser == null ) {
209 log.warn( "Cannot find parser for content-type: " + contentType
210 + " -- using default parser.");
211 parser = defaultParser;
212 }
213 return parser;
214 }
215
216 /**
217 * Returns a map of default parsers. Override this method to change
218 * what parsers are registered by default. You can of course call
219 * <code>super.buildDefaultParserMap()</code> and then add or remove
220 * from that result as well.
221 */
222 protected Map<String,Closure> buildDefaultParserMap() {
223 Map<String,Closure> parsers = new HashMap<String,Closure>();
224
225 parsers.put( ContentType.BINARY.toString(), new MethodClosure( this, "parseStream" ) );
226 parsers.put( ContentType.TEXT.toString(), new MethodClosure(this,"parseText") );
227 parsers.put( ContentType.URLENC.toString(), new MethodClosure(this,"parseForm") );
228 parsers.put( ContentType.HTML.toString(), new MethodClosure(this,"parseHTML") );
229
230 Closure pClosure = new MethodClosure(this,"parseXML");
231 for ( String ct : ContentType.XML.getContentTypeStrings() )
232 parsers.put( ct, pClosure );
233
234 pClosure = new MethodClosure(this,"parseJSON");
235 for ( String ct : ContentType.JSON.getContentTypeStrings() )
236 parsers.put( ct, pClosure );
237
238 return parsers;
239 }
240 }