001    /*
002     * LAPIS lightweight structured text processing system
003     *
004     * Copyright (C) 1998-2002 Carnegie Mellon University,
005     * Copyright (C) 2003 Massachusetts Institute of Technology.
006     * All rights reserved.
007     *
008     * This library is free software; you can redistribute it
009     * and/or modify it under the terms of the GNU General
010     * Public License as published by the Free Software
011     * Foundation, version 2.
012     *
013     * LAPIS homepage: http://graphics.lcs.mit.edu/lapis/
014     */
015    
016    package lapisx.net;
017    
018    import java.net.*;
019    import java.io.*;
020    //import java.util.*;
021    import lapisx.util.Str;
022    
023    public abstract class URLUtil {
024        /**
025         * Convert a local filename to a URL.
026         * For example, if the filename is "C:\FOO\BAR\BAZ",
027         * the resulting URL is "file:/C:/FOO/BAR/BAZ".
028         * @param file File to convert
029         * @return URL corresponding to file
030         */
031        public static URL FileToURL (File file) {
032            try {
033                String host = "";
034                String path = file.getAbsolutePath ();
035                path = path.replace (File.separatorChar, '/');
036                path = Str.replace (path, "%", "%25");
037                path = Str.replace (path, "#", "%23");
038                if (!path.startsWith ("/"))
039                    path = "/" + path;
040                else if (path.startsWith ("//"))
041                    host = "localhost";
042                return new URL ("file", host, path);
043            } catch (MalformedURLException e) {
044                // shouldn't happen
045                throw new RuntimeException (e.toString ());
046            }
047        }
048    
049        /**
050         * Convert a file: URL to a filename appropriate to the
051         * current system platform.  For example, on MS Windows,
052         * if the URL is "file:/FOO/BAR/BAZ", the resulting
053         * filename is "\FOO\BAR\BAZ".
054         * @param url URL to convert
055         * @return File corresponding to url
056         * @exception MalformedURLException if url is not a
057         * file: URL.
058         */
059        public static File URLToFile (URL url) throws MalformedURLException {
060            if (!url.getProtocol().equals ("file"))
061                throw new MalformedURLException ();
062    
063            String path = url.getFile ();
064            path = path.replace ('/', File.separatorChar);
065            path = Str.replace (path, "%23", "#");
066            path = Str.replace (path, "%25", "%");
067            // for MSWindows: change pathnames of the
068            // form /X:/ to X:/
069            if (path.length () > 3
070                && path.charAt (0) == File.separatorChar
071                && path.charAt(2) == ':'
072                && path.charAt (3) == File.separatorChar)
073                path = path.substring (1);
074    
075            return new File (path);
076        }
077    
078        /**
079         * Convert a string (representing either a filename or a local file: URL)
080         * to a File. Tries to behave reasonably on common platforms
081         * (especially Win32 and Unix).
082         * <P>
083         * Examples:
084         * <UL><LI>HrefToFile ("file:/home/rcm") returns new File("/home/rcm");
085         *     <LI>HrefToFile ("C:\FOO\BAR\BAZ") returns new File("C:\FOO\BAR\BAZ");
086         *     <LI>HrefToFile ("http://www.yahoo.com/") throws MalformedURLException
087         * @param href String to convert
088         * @return File corresponding to href
089         * @exception MalformedURLException if href uses a network protocol, like http: or ftp:.
090         */
091        public static File HrefToFile (String href) throws MalformedURLException {
092            return HrefToFile (href, null);
093        }
094    
095        public static File HrefToFile (String href, File curdir) throws MalformedURLException {
096            File file = null;
097    
098            try {
099                URL url = new URL (href);
100                String protocol = url.getProtocol ();
101                String host = url.getHost ();
102                String path = url.getFile ();
103                if (protocol.equals ("file")
104                    && (host == null 
105                        || host.equals ("") 
106                        || host.equals ("localhost"))) {
107                    path = path.replace ('/', File.separatorChar);
108                    // for MSWindows: change pathnames of the
109                    // form /X:/ to X:/
110                    if (path.length () > 3
111                        && path.charAt (0) == File.separatorChar
112                        && path.charAt(2) == ':'
113                        && path.charAt (3) == File.separatorChar)
114                        path = path.substring (1);
115                    file = new File (path);
116                }
117            } catch (MalformedURLException e) {
118                file = new File (href);
119                if (!file.isAbsolute () && curdir != null)
120                    file = new File (curdir, href);
121            }
122    
123            if (file == null)
124                throw new MalformedURLException ("not a filename or file: URL"); 
125            return file;
126        }
127    
128        /**
129         * Convert a string (representing either a filename or a URL) to a URL.
130         * <P>
131         * Examples:
132         * <UL><LI>HrefToURL ("http://www.yahoo.com/") returns new URL("http://www.yahoo.com/)";
133         *     <LI>HrefToURL ("C:\FOO\BAR\BAZ") returns new URL("file:/C:/FOO/BAR/BAZ");
134         * @param href String to convert
135         * @return URL corresponding to href
136         */
137        public static URL HrefToURL (String href) {
138            return HrefToURL (href, null);
139        }
140    
141        /**
142         * Convert a string (representing either a filename or a local URL) to a URL.
143         * <P>
144         * Examples:
145         * <UL><LI>HrefToURL ("http://www.yahoo.com/") returns new URL("http://www.yahoo.com/)";
146         *     <LI>HrefToURL ("C:\FOO\BAR\BAZ") returns new URL("file:/C:/FOO/BAR/BAZ");
147         * @param href String to convert
148         * @param curdir Current directory, in case href is a relative filename
149         * @return URL corresponding to href
150         */
151        public static URL HrefToURL (String href, File curdir) {
152            URL url;
153    
154            try {
155                url = new URL (href);
156    
157                String protocol = url.getProtocol ();
158                String host = url.getHost ();
159                String path = url.getFile ();
160                String ref = url.getRef ();
161                if (protocol.equals ("file")
162                    && (host == null 
163                        || host.equals ("") 
164                        || host.equals ("localhost"))) {
165                    // add a trailing slash to local directories;
166                    // otherwise JDK 1.2 won't give them content-type text/html
167                    if (path == null || path.length () == 0)
168                        path = "/";
169                    File file = new File (path);
170                    if (file.isDirectory () && !path.endsWith ("/"))
171                        path += "/";
172                    url = new URL ("file:" + path);
173                    if (ref != null && ref.length () > 0)
174                        url = new URL (url, "#" + ref);
175                }
176            }
177            catch (MalformedURLException e) {
178                File f = new File (href);
179                if (!f.isAbsolute () && curdir != null)
180                    f = new File (curdir, href);
181                url = FileToURL (f);
182            }
183    
184            return url;
185        }
186    
187        /**
188         * Get the URL of a page, omitting any anchor reference (like #ref).
189         * @param url URL to parse
190         * @return url sans anchor reference
191         */
192        public static URL getPageURL (URL url) {
193            String href = url.toExternalForm ();
194            int i = href.indexOf ('#');
195            try {
196                return (i != -1) ? new URL(href.substring (0, i)) : url;
197            } catch (MalformedURLException e) {
198                return url;
199            }
200        }
201    
202        /**
203         * Get the URL of a Web service, omitting any query or anchor reference.
204         * @return the URL sans query and anchor reference
205         */
206        public static URL getServiceURL (URL url) {
207            String href = url.toExternalForm ();
208            int i = href.indexOf ('?');
209            try {
210                return (i != -1)
211                    ? new URL(href.substring (0, i))
212                    : getPageURL(url);
213            } catch (MalformedURLException e) {
214                return url;
215            }
216        }
217    
218        /**
219         * Get the URL of a page's directory.
220         * @param url URL to parse
221         * @return url sans filename, query and anchor reference
222         */
223        public static URL getDirectoryURL (URL url) {
224            String file = url.getFile();
225            int qmark = file.indexOf ('?');
226            if (qmark == -1)
227                qmark = file.length();
228            // find pivotal separator (between directory and filename)
229            int pivot = file.lastIndexOf ('/', Math.max(qmark-1, 0));
230            try {
231                if (pivot == -1)
232                    return new URL (url, "/");
233                else if (pivot == file.length()-1)
234                    return url;
235                else
236                    return new URL (url, file.substring (0, pivot+1));
237            } catch (MalformedURLException e) {
238                return url;
239            }
240        }
241    
242        /**
243         * Get the URL of a page's parent directory.
244         * @param url URL to parse
245         * @return url sans filename, query and anchor reference
246         */
247        public static URL getParentURL (URL url) {
248            URL dirURL = getDirectoryURL (url);
249            if (!dirURL.equals (url))
250                return dirURL;
251    
252            String dir = dirURL.getFile ();
253            int lastSlash = dir.length()-1;
254            if (lastSlash == 0)
255                return dirURL;
256    
257            int penultSlash = dir.lastIndexOf ('/', lastSlash-1);
258    
259            if (penultSlash == -1)
260                return dirURL;
261    
262            try {
263                return new URL (url, dir.substring (0, penultSlash+1));
264            } catch (MalformedURLException e) {
265                return dirURL;
266            }
267        }
268    
269        /**
270         * Get the filename part of a URL, like "index.html".
271         * Never contains '/'; may be the empty string.
272         * @param url URL to parse
273         * @return the filename portion of url
274         */
275        public static String getFilename (URL url) {
276            String file = url.getFile ();
277            String query = getQuery (url);
278            int qmark = file.length() - query.length ();
279            int slash = file.lastIndexOf ('/', qmark);
280            return file.substring (slash+1, qmark);
281        }
282    
283        /**
284         * Get the query part of the link, like "?query".
285         * Either starts with a '?', or is the empty string.
286         * @param url URL to parse
287         * @return the query portion
288         */
289        public static String getQuery (URL url) {
290            String file = url.getFile();
291            int qmark = file.indexOf ('?');
292            if (qmark == -1)
293                return "";
294            else
295                return file.substring (qmark);
296        }
297    
298        /**
299         * Get the basename part of a URL, which is the filename without extension:
300         * e.g., "index". Never contains '/' or '.'; may be the empty string.
301         * @param url URL to parse
302         * @return the basename portion of url
303         */
304        public static String getBasename (URL url) {
305            String filename = getFilename (url);
306            int period = filename.indexOf ('.');
307            return (period != -1) ? filename.substring (0, period) : filename;
308        }
309    
310        /**
311         * Get the extension part of a URL, e.g. ".html".  Either starts with
312         * a period or is the empty string.
313         * @param url URL to parse
314         * @return the basename portion of url
315         */
316        public static String getExtension (URL url) {
317            String filename = getFilename (url);
318            int period = filename.indexOf ('.');
319            return (period != -1) ? filename.substring (period) : "";
320        }
321    
322        /**
323         * Computes relative HREF for URL <I>there</I> when the current location
324         * is URL <I>here</I>.
325         */
326        public static String relativeTo (URL here, URL there) {
327            if (here == null)
328                return there.toString();
329            //System.err.println ("From: " + here);
330            //System.err.println ("To:   " + there);
331            if (here.getProtocol().equals (there.getProtocol())
332                && here.getHost().equals (there.getHost ())
333                && here.getPort() == there.getPort ()) {
334                String fn = relativeTo (here.getFile (),
335                                        there.getFile ());
336                String ref = there.getRef ();
337                return (ref != null) ? fn+ref : fn;
338            }
339            else {
340              //System.err.println ("Use: " + there);
341                return there.toString ();
342            }
343        }
344    
345        /**
346         * Compute relative HREF for URL <i>there</i> when the current location
347         * is URL <i>here</i>.
348         */
349        public static String relativeTo (URL here, String there) {
350            if (here == null)
351                return there;
352          try {
353            return relativeTo (here, new URL (here, there));
354          } catch (MalformedURLException e) {
355            return there;
356          }
357        }
358    
359        /**
360         * Compute relative HREF for filename <there> when the current location
361         * is filename <here.
362         */
363        public static String relativeTo (String here, String there) {
364            StringBuffer result = new StringBuffer ();
365    
366            int lcp = 0;
367    
368            while (true) {
369                int i = here.indexOf ('/', lcp);
370                int j = there.indexOf ('/', lcp);
371    
372                if (i == -1 || i != j || !here.regionMatches (lcp, there, lcp, i-lcp))
373                    break;
374                lcp = i+1;
375            }
376    
377            // assert: first lcp characters of here and there are identical
378            //         and (lcp==0 or here[lcp-1] == '/')
379    
380            // here[0..lcp-1] is the common ancestor directory of here and there
381    
382            // count hops up from here to the common ancestor
383            for (int i = here.indexOf ('/', lcp);
384                 i != -1;
385                 i = here.indexOf ('/', i+1)) {
386                result.append ("..");
387                result.append ('/');
388            }
389    
390            // append path down from common ancestor to there
391            result.append (there.substring (lcp));
392    
393            //System.out.println ("Use:   " + result);
394            //System.out.println ();
395    
396            return result.toString ();
397        }
398    
399        /**
400         * Open a file or URL.
401         * @param href filename or URL to open
402         * @return stream of data from open file or Web page
403         */
404        public static Reader open (String href) throws IOException {
405            return open (HrefToURL (href));
406        }
407    
408        /**
409         * Open a URL.
410         * @param url URL to open
411         * @return stream of data from url
412         */
413        public static Reader open (URL url) throws IOException {
414            URLConnection conn = url.openConnection ();
415            InputStream in = conn.getInputStream ();
416            return new InputStreamReader (in);
417        }
418    }