001 /* 002 * Copyright 2003-2008 the original author or authors. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * You are receiving this code free of charge, which represents many hours of 017 * effort from other individuals and corporations. As a responsible member 018 * of the community, you are asked (but not required) to donate any 019 * enhancements or improvements back to the community under a similar open 020 * source license. Thank you. -TMN 021 */ 022 package groovyx.net.http; 023 024 import groovy.lang.Closure; 025 import groovy.util.XmlSlurper; 026 import groovy.util.slurpersupport.GPathResult; 027 import groovyx.net.http.HTTPBuilder.SendDelegate; 028 029 import java.io.IOException; 030 import java.io.InputStream; 031 import java.io.InputStreamReader; 032 import java.io.Reader; 033 import java.io.UnsupportedEncodingException; 034 import java.nio.charset.Charset; 035 import java.util.HashMap; 036 import java.util.List; 037 import java.util.Map; 038 039 import javax.xml.parsers.ParserConfigurationException; 040 041 import net.sf.json.JSON; 042 import net.sf.json.groovy.JsonSlurper; 043 044 import org.apache.commons.logging.Log; 045 import org.apache.commons.logging.LogFactory; 046 import org.apache.http.HttpResponse; 047 import org.apache.http.NameValuePair; 048 import org.apache.http.client.utils.URLEncodedUtils; 049 import org.codehaus.groovy.runtime.DefaultGroovyMethods; 050 import org.codehaus.groovy.runtime.MethodClosure; 051 import org.cyberneko.html.parsers.SAXParser; 052 import org.xml.sax.SAXException; 053 054 055 /** 056 * <p>Keeps track of response parsers for each content type. Each parser 057 * should should be a closure that accepts an {@link HttpResponse} instance, 058 * and returns whatever handler is appropriate for reading the response 059 * data for that content-type. For example, a plain-text response should 060 * probably be parsed with a <code>Reader</code>, while an XML response 061 * might be parsed by an XmlSlurper, which would then be passed to the 062 * response closure. </p> 063 * 064 * <p>Note that all methods in this class assume {@link HttpResponse#getEntity()} 065 * return a non-null value. It is the job of the HTTPBuilder instance to ensure 066 * a NullPointerException is not thrown by passing a response that contains no 067 * entity.</p> 068 * 069 * @see ContentType 070 */ 071 public class ParserRegistry { 072 073 protected Closure defaultParser = new MethodClosure( this, "parseStream" ); 074 protected final Log log = LogFactory.getLog( getClass() ); 075 076 /** 077 * Helper method to get the charset from the response. This should be done 078 * when manually parsing any text response to ensure it is decoded using the 079 * correct charset. For instance:<pre> 080 * Reader reader = new InputStreamReader( resp.getEntity().getContent(), 081 * ParserRegistry.getCharset( resp ) );</pre> 082 * @param resp 083 */ 084 public static String getCharset( HttpResponse resp ) { 085 NameValuePair charset = resp.getEntity().getContentType() 086 .getElements()[0].getParameterByName("charset"); 087 return ( charset == null || charset.getValue().trim().equals("") ) ? 088 Charset.defaultCharset().name() : charset.getValue(); 089 } 090 091 /** 092 * Helper method to get the content-type string from the response 093 * (no charset). 094 * @param resp 095 */ 096 public static String getContentType( HttpResponse resp ) { 097 /* TODO how do we handle a very rude server who does not return a 098 content-type header? It could cause an NPE here. and in getCharset */ 099 return resp.getEntity().getContentType() 100 .getElements()[0].getName(); 101 } 102 103 /** 104 * Default parser used for binary data. 105 * @see ContentType#BINARY 106 * @param resp 107 * @return an InputStream 108 * @throws IllegalStateException 109 * @throws IOException 110 */ 111 public InputStream parseStream( HttpResponse resp ) throws IOException { 112 return resp.getEntity().getContent(); 113 } 114 115 /** 116 * Default parser used to handle plain text data. The response text 117 * is decoded using the charset passed in the response content-type 118 * header. 119 * @see ContentType#TEXT 120 * @param resp 121 * @return 122 * @throws UnsupportedEncodingException 123 * @throws IllegalStateException 124 * @throws IOException 125 */ 126 public Reader parseText( HttpResponse resp ) throws IOException { 127 return new InputStreamReader( resp.getEntity().getContent(), 128 ParserRegistry.getCharset( resp ) ); 129 } 130 131 /** 132 * Default parser used to decode a URL-encoded response. 133 * @see ContentType#URLENC 134 * @param resp 135 * @return 136 * @throws IOException 137 */ 138 public Map<String,String> parseForm( HttpResponse resp ) throws IOException { 139 List<NameValuePair> params = URLEncodedUtils.parse( resp.getEntity() ); 140 Map<String,String> paramMap = new HashMap<String,String>(params.size()); 141 for ( NameValuePair param : params ) 142 paramMap.put( param.getName(), param.getValue() ); 143 return paramMap; 144 } 145 146 /** 147 * Parse an HTML document by passing it through the NekoHTML parser. 148 * @see ContentType#HTML 149 * @see SAXParser 150 * @see XmlSlurper#parse(Reader) 151 * @param resp HTTP response from which to parse content 152 * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)} 153 * @throws IOException 154 * @throws SAXException 155 */ 156 public GPathResult parseHTML( HttpResponse resp ) throws IOException, SAXException { 157 return new XmlSlurper( new org.cyberneko.html.parsers.SAXParser() ) 158 .parse( parseText( resp ) ); 159 } 160 161 /** 162 * Default parser used to decode an XML response. 163 * @see ContentType#XML 164 * @see XmlSlurper#parse(Reader) 165 * @param resp HTTP response from which to parse content 166 * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)} 167 * @throws IOException 168 * @throws SAXException 169 * @throws ParserConfigurationException 170 */ 171 public GPathResult parseXML( HttpResponse resp ) throws IOException, SAXException, ParserConfigurationException { 172 return new XmlSlurper().parse( parseText( resp ) ); 173 } 174 175 /** 176 * Default parser used to decode a JSON response. 177 * @see ContentType#JSON 178 * @param resp 179 * @return 180 * @throws IOException 181 */ 182 public JSON parseJSON( HttpResponse resp ) throws IOException { 183 // there is a bug in the JsonSlurper.parse method... 184 String jsonTxt = DefaultGroovyMethods.getText( parseText( resp ) ); 185 return new JsonSlurper().parseText( jsonTxt ); 186 } 187 188 protected Map<String,Closure> registeredParsers = buildDefaultParserMap(); 189 190 /** 191 * Register a new parser for the given content-type. The parser closure 192 * should accept an {@link HttpResponse} argument and return a type suitable 193 * to be passed to a {@link SendDelegate#getResponse() response handler}. 194 * The value returned from the parser closure is always the second parameter 195 * of the response handler closure. 196 * @param contentType <code>content-type</code> string 197 * @param closure code that will parse the HttpResponse and return parsed 198 * data to the response handler. 199 */ 200 public void register( String contentType, Closure closure ) { 201 registeredParsers.put( contentType, closure ); 202 } 203 204 /* Retrieve a parser for the given response content-type string. This 205 * should usually not be called by a user. The appropriate parser will 206 * be resolved prior to executing the response handler. 207 * @param contentType 208 * @return parser that can interpret the given response content type, 209 * or the default parser if no parser is registered for the given 210 * content-type. It should NOT return a null value. 211 */ 212 Closure get( String contentType ) { 213 Closure parser = registeredParsers.get(contentType); 214 if ( parser == null ) { 215 log.warn( "Cannot find parser for content-type: " + contentType 216 + " -- using default parser."); 217 parser = defaultParser; 218 } 219 return parser; 220 } 221 222 /** 223 * Returns a map of default parsers. Override this method to change 224 * what parsers are registered by default. You can of course call 225 * <code>super.buildDefaultParserMap()</code> and then add or remove 226 * from that result as well. 227 */ 228 protected Map<String,Closure> buildDefaultParserMap() { 229 Map<String,Closure> parsers = new HashMap<String,Closure>(); 230 231 parsers.put( ContentType.BINARY.toString(), new MethodClosure( this, "parseStream" ) ); 232 parsers.put( ContentType.TEXT.toString(), new MethodClosure(this,"parseText") ); 233 parsers.put( ContentType.URLENC.toString(), new MethodClosure(this,"parseForm") ); 234 parsers.put( ContentType.HTML.toString(), new MethodClosure(this,"parseHTML") ); 235 236 Closure pClosure = new MethodClosure(this,"parseXML"); 237 for ( String ct : ContentType.XML.getContentTypeStrings() ) 238 parsers.put( ct, pClosure ); 239 240 pClosure = new MethodClosure(this,"parseJSON"); 241 for ( String ct : ContentType.JSON.getContentTypeStrings() ) 242 parsers.put( ct, pClosure ); 243 244 return parsers; 245 } 246 }