001    /*
002     * Copyright 2003-2008 the original author or authors.
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     *
008     *     http://www.apache.org/licenses/LICENSE-2.0
009     *
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     *
016     * You are receiving this code free of charge, which represents many hours of
017     * effort from other individuals and corporations.  As a responsible member 
018     * of the community, you are asked (but not required) to donate any 
019     * enhancements or improvements back to the community under a similar open 
020     * source license.  Thank you. -TMN
021     */
022    package groovyx.net.http;
023    
024    import groovy.lang.Closure;
025    import groovy.util.XmlSlurper;
026    import groovy.util.slurpersupport.GPathResult;
027    import groovyx.net.http.HTTPBuilder.SendDelegate;
028    
029    import java.io.IOException;
030    import java.io.InputStream;
031    import java.io.InputStreamReader;
032    import java.io.Reader;
033    import java.io.UnsupportedEncodingException;
034    import java.nio.charset.Charset;
035    import java.util.HashMap;
036    import java.util.List;
037    import java.util.Map;
038    
039    import javax.xml.parsers.ParserConfigurationException;
040    
041    import net.sf.json.JSON;
042    import net.sf.json.groovy.JsonSlurper;
043    
044    import org.apache.commons.logging.Log;
045    import org.apache.commons.logging.LogFactory;
046    import org.apache.http.HttpResponse;
047    import org.apache.http.NameValuePair;
048    import org.apache.http.client.utils.URLEncodedUtils;
049    import org.codehaus.groovy.runtime.DefaultGroovyMethods;
050    import org.codehaus.groovy.runtime.MethodClosure;
051    import org.cyberneko.html.parsers.SAXParser;
052    import org.xml.sax.SAXException;
053    
054    
055    /**
056     * <p>Keeps track of response parsers for each content type.  Each parser 
057     * should should be a closure that accepts an {@link HttpResponse} instance,
058     * and returns whatever handler is appropriate for reading the response 
059     * data for that content-type.  For example, a plain-text response should 
060     * probably be parsed with a <code>Reader</code>, while an XML response 
061     * might be parsed by an XmlSlurper, which would then be passed to the 
062     * response closure. </p>
063     * 
064     * <p>Note that all methods in this class assume {@link HttpResponse#getEntity()}
065     * return a non-null value.  It is the job of the HTTPBuilder instance to ensure
066     * a NullPointerException is not thrown by passing a response that contains no
067     * entity.</p>
068     * 
069     * @see ContentType
070     */
071    public class ParserRegistry {
072            
073            protected Closure defaultParser = new MethodClosure( this, "parseStream" );
074            protected final Log log = LogFactory.getLog( getClass() );
075            
076            /**
077             * Helper method to get the charset from the response.  This should be done 
078             * when manually parsing any text response to ensure it is decoded using the
079             * correct charset. For instance:<pre>
080             * Reader reader = new InputStreamReader( resp.getEntity().getContent(), 
081             *   ParserRegistry.getCharset( resp ) );</pre>
082             * @param resp
083             */
084            public static String getCharset( HttpResponse resp ) {
085                    NameValuePair charset = resp.getEntity().getContentType()
086                                    .getElements()[0].getParameterByName("charset"); 
087                    return ( charset == null || charset.getValue().trim().equals("") ) ?
088                            Charset.defaultCharset().name() : charset.getValue();
089            }
090            
091            /**
092             * Helper method to get the content-type string from the response 
093             * (no charset).
094             * @param resp
095             */
096            public static String getContentType( HttpResponse resp ) {
097                    /* TODO how do we handle a very rude server who does not return a 
098                       content-type header?  It could cause an NPE here. and in getCharset */
099                    return resp.getEntity().getContentType()
100                            .getElements()[0].getName();
101            }
102            
103            /**
104             * Default parser used for binary data.
105             * @param resp
106             * @return an InputStream 
107             * @throws IllegalStateException
108             * @throws IOException
109             */
110            public InputStream parseStream( HttpResponse resp ) throws IOException {
111                    return resp.getEntity().getContent();
112            }
113            
114            /**
115             * Default parser used to handle plain text data.  The response text 
116             * is decoded using the charset passed in the response content-type 
117             * header. 
118             * @param resp
119             * @return
120             * @throws UnsupportedEncodingException
121             * @throws IllegalStateException
122             * @throws IOException
123             */
124            public Reader parseText( HttpResponse resp ) throws IOException {
125                    return new InputStreamReader( resp.getEntity().getContent(), 
126                                    ParserRegistry.getCharset( resp ) );
127            }
128            
129            /**
130             * Default parser used to decode a URL-encoded response.
131             * @param resp
132             * @return
133             * @throws IOException
134             */
135            public Map<String,String> parseForm( HttpResponse resp ) throws IOException {
136                    List<NameValuePair> params = URLEncodedUtils.parse( resp.getEntity() );
137                    Map<String,String> paramMap = new HashMap<String,String>(params.size());
138                    for ( NameValuePair param : params ) 
139                            paramMap.put( param.getName(), param.getValue() );
140                    return paramMap;
141            }
142            
143            /**
144             * Parse an HTML document by passing it through the NekoHTML parser.
145             * @see SAXParser
146             * @see XmlSlurper#parse(Reader)
147             * @param resp HTTP response from which to parse content
148             * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)}
149             * @throws IOException
150             * @throws SAXException
151             */
152            public GPathResult parseHTML( HttpResponse resp ) throws IOException, SAXException {
153                    return new XmlSlurper( new org.cyberneko.html.parsers.SAXParser() )
154                            .parse( parseText( resp ) );
155            }
156            
157            /**
158             * Default parser used to decode an XML response.  
159             * @see XmlSlurper#parse(Reader)
160             * @param resp HTTP response from which to parse content
161             * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)}
162             * @throws IOException
163             * @throws SAXException
164             * @throws ParserConfigurationException
165             */
166            public GPathResult parseXML( HttpResponse resp ) throws IOException, SAXException, ParserConfigurationException {
167                    return new XmlSlurper().parse( parseText( resp ) );
168            }
169            
170            /**
171             * Default parser used to decode a JSON response.
172             * @param resp
173             * @return
174             * @throws IOException
175             */
176            public JSON parseJSON( HttpResponse resp ) throws IOException {
177                    // there is a bug in the JsonSlurper.parse method...
178                    String jsonTxt = DefaultGroovyMethods.getText( parseText( resp ) );                     
179                    return new JsonSlurper().parseText( jsonTxt );
180            }
181            
182            protected Map<String,Closure> registeredParsers = buildDefaultParserMap();
183            
184            /**
185             * Register a new parser for the given content-type.  The parser closure
186             * should accept an {@link HttpResponse} argument and return a type suitable
187             * to be passed to a {@link SendDelegate#getResponse() response handler}.
188             * The value returned from the parser closure is always the second parameter 
189             * of the response handler closure.  
190             * @param contentType  <code>content-type</code> string
191             * @param closure code that will parse the HttpResponse and return parsed 
192             *   data to the response handler. 
193             */
194            public void register( String contentType, Closure closure ) {
195                    registeredParsers.put( contentType, closure );
196            }
197            
198            /* Retrieve a parser for the given response content-type string.  This
199             * should usually not be called by a user.  The appropriate parser will
200             * be resolved prior to executing the response handler. 
201             * @param contentType
202             * @return parser that can interpret the given response content type,
203             *   or the default parser if no parser is registered for the given 
204             *   content-type.  It should NOT return a null value.
205             */
206            Closure get( String contentType ) { 
207                    Closure parser = registeredParsers.get(contentType);
208                    if ( parser == null ) {
209                            log.warn( "Cannot find parser for content-type: " + contentType 
210                                            + " -- using default parser.");
211                            parser = defaultParser;
212                    }
213                    return parser;
214            }
215            
216            /**
217             * Returns a map of default parsers.  Override this method to change 
218             * what parsers are registered by default.  You can of course call
219             * <code>super.buildDefaultParserMap()</code> and then add or remove 
220             * from that result as well.
221             */
222            protected Map<String,Closure> buildDefaultParserMap() {
223                    Map<String,Closure> parsers = new HashMap<String,Closure>();
224                    
225                    parsers.put( ContentType.BINARY.toString(), new MethodClosure( this, "parseStream" ) );
226                    parsers.put( ContentType.TEXT.toString(), new MethodClosure(this,"parseText") );
227                    parsers.put( ContentType.URLENC.toString(), new MethodClosure(this,"parseForm") );
228                    parsers.put( ContentType.HTML.toString(), new MethodClosure(this,"parseHTML") );
229                    
230                    Closure pClosure = new MethodClosure(this,"parseXML");
231                    for ( String ct : ContentType.XML.getContentTypeStrings() )
232                            parsers.put( ct, pClosure );
233                    
234                    pClosure = new MethodClosure(this,"parseJSON");
235                    for ( String ct : ContentType.JSON.getContentTypeStrings() )
236                            parsers.put( ct, pClosure );
237                    
238                    return parsers;
239            }
240    }