001 /* 002 * Copyright 2003-2008 the original author or authors. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * You are receiving this code free of charge, which represents many hours of 017 * effort from other individuals and corporations. As a responsible member 018 * of the community, you are asked (but not required) to donate any 019 * enhancements or improvements back to the community under a similar open 020 * source license. Thank you. -TMN 021 */ 022 package groovyx.net.http; 023 024 import groovy.lang.Closure; 025 import groovy.util.XmlSlurper; 026 import groovy.util.slurpersupport.GPathResult; 027 import groovyx.net.http.HTTPBuilder.SendDelegate; 028 029 import java.io.IOException; 030 import java.io.InputStream; 031 import java.io.InputStreamReader; 032 import java.io.Reader; 033 import java.io.UnsupportedEncodingException; 034 import java.nio.charset.Charset; 035 import java.util.HashMap; 036 import java.util.List; 037 import java.util.Map; 038 039 import javax.xml.parsers.ParserConfigurationException; 040 041 import net.sf.json.JSON; 042 import net.sf.json.groovy.JsonSlurper; 043 044 import org.apache.commons.logging.Log; 045 import org.apache.commons.logging.LogFactory; 046 import org.apache.http.HttpResponse; 047 import org.apache.http.NameValuePair; 048 import org.apache.http.client.utils.URLEncodedUtils; 049 import org.codehaus.groovy.runtime.DefaultGroovyMethods; 050 import org.codehaus.groovy.runtime.MethodClosure; 051 import org.cyberneko.html.parsers.SAXParser; 052 import org.xml.sax.SAXException; 053 054 055 /** 056 * <p>Keeps track of response parsers for each content type. Each parser 057 * should should be a closure that accepts an {@link HttpResponse} instance, 058 * and returns whatever handler is appropriate for reading the response 059 * data for that content-type. For example, a plain-text response should 060 * probably be parsed with a <code>Reader</code>, while an XML response 061 * might be parsed by an XmlSlurper, which would then be passed to the 062 * response closure. </p> 063 * 064 * <p>Note that all methods in this class assume {@link HttpResponse#getEntity()} 065 * return a non-null value. It is the job of the HTTPBuilder instance to ensure 066 * a NullPointerException is not thrown by passing a response that contains no 067 * entity.</p> 068 * 069 * @see ContentType 070 */ 071 public class ParserRegistry { 072 073 protected Closure defaultParser = new MethodClosure( this, "parseStream" ); 074 protected final Log log = LogFactory.getLog( getClass() ); 075 076 /** 077 * Helper method to get the charset from the response. This should be done 078 * when manually parsing any text response to ensure it is decoded using the 079 * correct charset. For instance:<pre> 080 * Reader reader = new InputStreamReader( resp.getEntity().getContent(), 081 * ParserRegistry.getCharset( resp ) );</pre> 082 * @param resp 083 */ 084 public static String getCharset( HttpResponse resp ) { 085 NameValuePair charset = resp.getEntity().getContentType() 086 .getElements()[0].getParameterByName("charset"); 087 return ( charset == null || charset.getValue().trim().equals("") ) ? 088 Charset.defaultCharset().name() : charset.getValue(); 089 } 090 091 /** 092 * Helper method to get the content-type string from the response 093 * (no charset). 094 * @param resp 095 */ 096 public static String getContentType( HttpResponse resp ) { 097 /* TODO how do we handle a very rude server who does not return a 098 content-type header? It could cause an NPE here. and in getCharset */ 099 return resp.getEntity().getContentType() 100 .getElements()[0].getName(); 101 } 102 103 /** 104 * Default parser used for binary data. 105 * @param resp 106 * @return an InputStream 107 * @throws IllegalStateException 108 * @throws IOException 109 */ 110 public InputStream parseStream( HttpResponse resp ) throws IOException { 111 return resp.getEntity().getContent(); 112 } 113 114 /** 115 * Default parser used to handle plain text data. The response text 116 * is decoded using the charset passed in the response content-type 117 * header. 118 * @param resp 119 * @return 120 * @throws UnsupportedEncodingException 121 * @throws IllegalStateException 122 * @throws IOException 123 */ 124 public Reader parseText( HttpResponse resp ) throws IOException { 125 return new InputStreamReader( resp.getEntity().getContent(), 126 ParserRegistry.getCharset( resp ) ); 127 } 128 129 /** 130 * Default parser used to decode a URL-encoded response. 131 * @param resp 132 * @return 133 * @throws IOException 134 */ 135 public Map<String,String> parseForm( HttpResponse resp ) throws IOException { 136 List<NameValuePair> params = URLEncodedUtils.parse( resp.getEntity() ); 137 Map<String,String> paramMap = new HashMap<String,String>(params.size()); 138 for ( NameValuePair param : params ) 139 paramMap.put( param.getName(), param.getValue() ); 140 return paramMap; 141 } 142 143 /** 144 * Parse an HTML document by passing it through the NekoHTML parser. 145 * @see SAXParser 146 * @see XmlSlurper#parse(Reader) 147 * @param resp HTTP response from which to parse content 148 * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)} 149 * @throws IOException 150 * @throws SAXException 151 */ 152 public GPathResult parseHTML( HttpResponse resp ) throws IOException, SAXException { 153 return new XmlSlurper( new org.cyberneko.html.parsers.SAXParser() ) 154 .parse( parseText( resp ) ); 155 } 156 157 /** 158 * Default parser used to decode an XML response. 159 * @see XmlSlurper#parse(Reader) 160 * @param resp HTTP response from which to parse content 161 * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)} 162 * @throws IOException 163 * @throws SAXException 164 * @throws ParserConfigurationException 165 */ 166 public GPathResult parseXML( HttpResponse resp ) throws IOException, SAXException, ParserConfigurationException { 167 return new XmlSlurper().parse( parseText( resp ) ); 168 } 169 170 /** 171 * Default parser used to decode a JSON response. 172 * @param resp 173 * @return 174 * @throws IOException 175 */ 176 public JSON parseJSON( HttpResponse resp ) throws IOException { 177 // there is a bug in the JsonSlurper.parse method... 178 String jsonTxt = DefaultGroovyMethods.getText( parseText( resp ) ); 179 return new JsonSlurper().parseText( jsonTxt ); 180 } 181 182 protected Map<String,Closure> registeredParsers = buildDefaultParserMap(); 183 184 /** 185 * Register a new parser for the given content-type. The parser closure 186 * should accept an {@link HttpResponse} argument and return a type suitable 187 * to be passed to a {@link SendDelegate#getResponse() response handler}. 188 * The value returned from the parser closure is always the second parameter 189 * of the response handler closure. 190 * @param contentType <code>content-type</code> string 191 * @param closure code that will parse the HttpResponse and return parsed 192 * data to the response handler. 193 */ 194 public void register( String contentType, Closure closure ) { 195 registeredParsers.put( contentType, closure ); 196 } 197 198 /* Retrieve a parser for the given response content-type string. This 199 * should usually not be called by a user. The appropriate parser will 200 * be resolved prior to executing the response handler. 201 * @param contentType 202 * @return parser that can interpret the given response content type, 203 * or the default parser if no parser is registered for the given 204 * content-type. It should NOT return a null value. 205 */ 206 Closure get( String contentType ) { 207 Closure parser = registeredParsers.get(contentType); 208 if ( parser == null ) { 209 log.warn( "Cannot find parser for content-type: " + contentType 210 + " -- using default parser."); 211 parser = defaultParser; 212 } 213 return parser; 214 } 215 216 /** 217 * Returns a map of default parsers. Override this method to change 218 * what parsers are registered by default. You can of course call 219 * <code>super.buildDefaultParserMap()</code> and then add or remove 220 * from that result as well. 221 */ 222 protected Map<String,Closure> buildDefaultParserMap() { 223 Map<String,Closure> parsers = new HashMap<String,Closure>(); 224 225 parsers.put( ContentType.BINARY.toString(), new MethodClosure( this, "parseStream" ) ); 226 parsers.put( ContentType.TEXT.toString(), new MethodClosure(this,"parseText") ); 227 parsers.put( ContentType.URLENC.toString(), new MethodClosure(this,"parseForm") ); 228 parsers.put( ContentType.HTML.toString(), new MethodClosure(this,"parseHTML") ); 229 230 Closure pClosure = new MethodClosure(this,"parseXML"); 231 for ( String ct : ContentType.XML.getContentTypeStrings() ) 232 parsers.put( ct, pClosure ); 233 234 pClosure = new MethodClosure(this,"parseJSON"); 235 for ( String ct : ContentType.JSON.getContentTypeStrings() ) 236 parsers.put( ct, pClosure ); 237 238 return parsers; 239 } 240 }