View Javadoc

1   /*
2    * Copyright 2003-2008 the original author or authors.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * You are receiving this code free of charge, which represents many hours of
17   * effort from other individuals and corporations.  As a responsible member 
18   * of the community, you are asked (but not required) to donate any 
19   * enhancements or improvements back to the community under a similar open 
20   * source license.  Thank you. -TMN
21   */
22  package groovyx.net.http;
23  
24  import groovy.lang.Closure;
25  import groovy.util.XmlSlurper;
26  import groovy.util.slurpersupport.GPathResult;
27  import groovyx.net.http.HTTPBuilder.SendDelegate;
28  
29  import java.io.IOException;
30  import java.io.InputStream;
31  import java.io.InputStreamReader;
32  import java.io.Reader;
33  import java.io.UnsupportedEncodingException;
34  import java.nio.charset.Charset;
35  import java.util.HashMap;
36  import java.util.List;
37  import java.util.Map;
38  
39  import javax.xml.parsers.ParserConfigurationException;
40  
41  import net.sf.json.JSON;
42  import net.sf.json.groovy.JsonSlurper;
43  
44  import org.apache.commons.logging.Log;
45  import org.apache.commons.logging.LogFactory;
46  import org.apache.http.HttpResponse;
47  import org.apache.http.NameValuePair;
48  import org.apache.http.client.utils.URLEncodedUtils;
49  import org.codehaus.groovy.runtime.DefaultGroovyMethods;
50  import org.codehaus.groovy.runtime.MethodClosure;
51  import org.cyberneko.html.parsers.SAXParser;
52  import org.xml.sax.SAXException;
53  
54  
55  /**
56   * <p>Keeps track of response parsers for each content type.  Each parser 
57   * should should be a closure that accepts an {@link HttpResponse} instance,
58   * and returns whatever handler is appropriate for reading the response 
59   * data for that content-type.  For example, a plain-text response should 
60   * probably be parsed with a <code>Reader</code>, while an XML response 
61   * might be parsed by an XmlSlurper, which would then be passed to the 
62   * response closure. </p>
63   * 
64   * <p>Note that all methods in this class assume {@link HttpResponse#getEntity()}
65   * return a non-null value.  It is the job of the HTTPBuilder instance to ensure
66   * a NullPointerException is not thrown by passing a response that contains no
67   * entity.</p>
68   * 
69   * @see ContentType
70   */
71  public class ParserRegistry {
72  	
73  	protected Closure defaultParser = new MethodClosure( this, "parseStream" );
74  	protected final Log log = LogFactory.getLog( getClass() );
75  	
76  	/**
77  	 * Helper method to get the charset from the response.  This should be done 
78  	 * when manually parsing any text response to ensure it is decoded using the
79  	 * correct charset. For instance:<pre>
80  	 * Reader reader = new InputStreamReader( resp.getEntity().getContent(), 
81  	 *   ParserRegistry.getCharset( resp ) );</pre>
82  	 * @param resp
83  	 */
84  	public static String getCharset( HttpResponse resp ) {
85  		NameValuePair charset = resp.getEntity().getContentType()
86  				.getElements()[0].getParameterByName("charset"); 
87  		return ( charset == null || charset.getValue().trim().equals("") ) ?
88  			Charset.defaultCharset().name() : charset.getValue();
89  	}
90  	
91  	/**
92  	 * Helper method to get the content-type string from the response 
93  	 * (no charset).
94  	 * @param resp
95  	 */
96  	public static String getContentType( HttpResponse resp ) {
97  		/* TODO how do we handle a very rude server who does not return a 
98  		   content-type header?  It could cause an NPE here. and in getCharset */
99  		return resp.getEntity().getContentType()
100 			.getElements()[0].getName();
101 	}
102 	
103 	/**
104 	 * Default parser used for binary data.
105 	 * @param resp
106 	 * @return an InputStream 
107 	 * @throws IllegalStateException
108 	 * @throws IOException
109 	 */
110 	public InputStream parseStream( HttpResponse resp ) throws IOException {
111 		return resp.getEntity().getContent();
112 	}
113 	
114 	/**
115 	 * Default parser used to handle plain text data.  The response text 
116 	 * is decoded using the charset passed in the response content-type 
117 	 * header. 
118 	 * @param resp
119 	 * @return
120 	 * @throws UnsupportedEncodingException
121 	 * @throws IllegalStateException
122 	 * @throws IOException
123 	 */
124 	public Reader parseText( HttpResponse resp ) throws IOException {
125 		return new InputStreamReader( resp.getEntity().getContent(), 
126 				ParserRegistry.getCharset( resp ) );
127 	}
128 	
129 	/**
130 	 * Default parser used to decode a URL-encoded response.
131 	 * @param resp
132 	 * @return
133 	 * @throws IOException
134 	 */
135 	public Map<String,String> parseForm( HttpResponse resp ) throws IOException {
136 		List<NameValuePair> params = URLEncodedUtils.parse( resp.getEntity() );
137 		Map<String,String> paramMap = new HashMap<String,String>(params.size());
138 		for ( NameValuePair param : params ) 
139 			paramMap.put( param.getName(), param.getValue() );
140 		return paramMap;
141 	}
142 	
143 	/**
144 	 * Parse an HTML document by passing it through the NekoHTML parser.
145 	 * @see SAXParser
146 	 * @see XmlSlurper#parse(Reader)
147 	 * @param resp HTTP response from which to parse content
148 	 * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)}
149 	 * @throws IOException
150 	 * @throws SAXException
151 	 */
152 	public GPathResult parseHTML( HttpResponse resp ) throws IOException, SAXException {
153 		return new XmlSlurper( new org.cyberneko.html.parsers.SAXParser() )
154 			.parse( parseText( resp ) );
155 	}
156 	
157 	/**
158 	 * Default parser used to decode an XML response.  
159 	 * @see XmlSlurper#parse(Reader)
160 	 * @param resp HTTP response from which to parse content
161 	 * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)}
162 	 * @throws IOException
163 	 * @throws SAXException
164 	 * @throws ParserConfigurationException
165 	 */
166 	public GPathResult parseXML( HttpResponse resp ) throws IOException, SAXException, ParserConfigurationException {
167 		return new XmlSlurper().parse( parseText( resp ) );
168 	}
169 	
170 	/**
171 	 * Default parser used to decode a JSON response.
172 	 * @param resp
173 	 * @return
174 	 * @throws IOException
175 	 */
176 	public JSON parseJSON( HttpResponse resp ) throws IOException {
177 		// there is a bug in the JsonSlurper.parse method...
178 		String jsonTxt = DefaultGroovyMethods.getText( parseText( resp ) );			
179 		return new JsonSlurper().parseText( jsonTxt );
180 	}
181 	
182 	protected Map<String,Closure> registeredParsers = buildDefaultParserMap();
183 	
184 	/**
185 	 * Register a new parser for the given content-type.  The parser closure
186 	 * should accept an {@link HttpResponse} argument and return a type suitable
187 	 * to be passed to a {@link SendDelegate#getResponse() response handler}.
188 	 * The value returned from the parser closure is always the second parameter 
189 	 * of the response handler closure.  
190 	 * @param contentType  <code>content-type</code> string
191 	 * @param closure code that will parse the HttpResponse and return parsed 
192 	 *   data to the response handler. 
193 	 */
194 	public void register( String contentType, Closure closure ) {
195 		registeredParsers.put( contentType, closure );
196 	}
197 	
198 	/* Retrieve a parser for the given response content-type string.  This
199 	 * should usually not be called by a user.  The appropriate parser will
200 	 * be resolved prior to executing the response handler. 
201 	 * @param contentType
202 	 * @return parser that can interpret the given response content type,
203 	 *   or the default parser if no parser is registered for the given 
204 	 *   content-type.  It should NOT return a null value.
205 	 */
206 	Closure get( String contentType ) { 
207 		Closure parser = registeredParsers.get(contentType);
208 		if ( parser == null ) {
209 			log.warn( "Cannot find parser for content-type: " + contentType 
210 					+ " -- using default parser.");
211 			parser = defaultParser;
212 		}
213 		return parser;
214 	}
215 	
216 	/**
217 	 * Returns a map of default parsers.  Override this method to change 
218 	 * what parsers are registered by default.  You can of course call
219 	 * <code>super.buildDefaultParserMap()</code> and then add or remove 
220 	 * from that result as well.
221 	 */
222 	protected Map<String,Closure> buildDefaultParserMap() {
223 		Map<String,Closure> parsers = new HashMap<String,Closure>();
224 		
225 		parsers.put( ContentType.BINARY.toString(), new MethodClosure( this, "parseStream" ) );
226 		parsers.put( ContentType.TEXT.toString(), new MethodClosure(this,"parseText") );
227 		parsers.put( ContentType.URLENC.toString(), new MethodClosure(this,"parseForm") );
228 		parsers.put( ContentType.HTML.toString(), new MethodClosure(this,"parseHTML") );
229 		
230 		Closure pClosure = new MethodClosure(this,"parseXML");
231 		for ( String ct : ContentType.XML.getContentTypeStrings() )
232 			parsers.put( ct, pClosure );
233 		
234 		pClosure = new MethodClosure(this,"parseJSON");
235 		for ( String ct : ContentType.JSON.getContentTypeStrings() )
236 			parsers.put( ct, pClosure );
237 		
238 		return parsers;
239 	}
240 }