001 /* 002 * LAPIS lightweight structured text processing system 003 * 004 * Copyright (C) 1998-2002 Carnegie Mellon University, 005 * Copyright (C) 2003 Massachusetts Institute of Technology. 006 * All rights reserved. 007 * 008 * This library is free software; you can redistribute it 009 * and/or modify it under the terms of the GNU General 010 * Public License as published by the Free Software 011 * Foundation, version 2. 012 * 013 * LAPIS homepage: http://graphics.lcs.mit.edu/lapis/ 014 */ 015 016 package lapisx.net; 017 018 import java.net.*; 019 import java.io.*; 020 //import java.util.*; 021 import lapisx.util.Str; 022 023 public abstract class URLUtil { 024 /** 025 * Convert a local filename to a URL. 026 * For example, if the filename is "C:\FOO\BAR\BAZ", 027 * the resulting URL is "file:/C:/FOO/BAR/BAZ". 028 * @param file File to convert 029 * @return URL corresponding to file 030 */ 031 public static URL FileToURL (File file) { 032 try { 033 String host = ""; 034 String path = file.getAbsolutePath (); 035 path = path.replace (File.separatorChar, '/'); 036 path = Str.replace (path, "%", "%25"); 037 path = Str.replace (path, "#", "%23"); 038 if (!path.startsWith ("/")) 039 path = "/" + path; 040 else if (path.startsWith ("//")) 041 host = "localhost"; 042 return new URL ("file", host, path); 043 } catch (MalformedURLException e) { 044 // shouldn't happen 045 throw new RuntimeException (e.toString ()); 046 } 047 } 048 049 /** 050 * Convert a file: URL to a filename appropriate to the 051 * current system platform. For example, on MS Windows, 052 * if the URL is "file:/FOO/BAR/BAZ", the resulting 053 * filename is "\FOO\BAR\BAZ". 054 * @param url URL to convert 055 * @return File corresponding to url 056 * @exception MalformedURLException if url is not a 057 * file: URL. 058 */ 059 public static File URLToFile (URL url) throws MalformedURLException { 060 if (!url.getProtocol().equals ("file")) 061 throw new MalformedURLException (); 062 063 String path = url.getFile (); 064 path = path.replace ('/', File.separatorChar); 065 path = Str.replace (path, "%23", "#"); 066 path = Str.replace (path, "%25", "%"); 067 // for MSWindows: change pathnames of the 068 // form /X:/ to X:/ 069 if (path.length () > 3 070 && path.charAt (0) == File.separatorChar 071 && path.charAt(2) == ':' 072 && path.charAt (3) == File.separatorChar) 073 path = path.substring (1); 074 075 return new File (path); 076 } 077 078 /** 079 * Convert a string (representing either a filename or a local file: URL) 080 * to a File. Tries to behave reasonably on common platforms 081 * (especially Win32 and Unix). 082 * <P> 083 * Examples: 084 * <UL><LI>HrefToFile ("file:/home/rcm") returns new File("/home/rcm"); 085 * <LI>HrefToFile ("C:\FOO\BAR\BAZ") returns new File("C:\FOO\BAR\BAZ"); 086 * <LI>HrefToFile ("http://www.yahoo.com/") throws MalformedURLException 087 * @param href String to convert 088 * @return File corresponding to href 089 * @exception MalformedURLException if href uses a network protocol, like http: or ftp:. 090 */ 091 public static File HrefToFile (String href) throws MalformedURLException { 092 return HrefToFile (href, null); 093 } 094 095 public static File HrefToFile (String href, File curdir) throws MalformedURLException { 096 File file = null; 097 098 try { 099 URL url = new URL (href); 100 String protocol = url.getProtocol (); 101 String host = url.getHost (); 102 String path = url.getFile (); 103 if (protocol.equals ("file") 104 && (host == null 105 || host.equals ("") 106 || host.equals ("localhost"))) { 107 path = path.replace ('/', File.separatorChar); 108 // for MSWindows: change pathnames of the 109 // form /X:/ to X:/ 110 if (path.length () > 3 111 && path.charAt (0) == File.separatorChar 112 && path.charAt(2) == ':' 113 && path.charAt (3) == File.separatorChar) 114 path = path.substring (1); 115 file = new File (path); 116 } 117 } catch (MalformedURLException e) { 118 file = new File (href); 119 if (!file.isAbsolute () && curdir != null) 120 file = new File (curdir, href); 121 } 122 123 if (file == null) 124 throw new MalformedURLException ("not a filename or file: URL"); 125 return file; 126 } 127 128 /** 129 * Convert a string (representing either a filename or a URL) to a URL. 130 * <P> 131 * Examples: 132 * <UL><LI>HrefToURL ("http://www.yahoo.com/") returns new URL("http://www.yahoo.com/)"; 133 * <LI>HrefToURL ("C:\FOO\BAR\BAZ") returns new URL("file:/C:/FOO/BAR/BAZ"); 134 * @param href String to convert 135 * @return URL corresponding to href 136 */ 137 public static URL HrefToURL (String href) { 138 return HrefToURL (href, null); 139 } 140 141 /** 142 * Convert a string (representing either a filename or a local URL) to a URL. 143 * <P> 144 * Examples: 145 * <UL><LI>HrefToURL ("http://www.yahoo.com/") returns new URL("http://www.yahoo.com/)"; 146 * <LI>HrefToURL ("C:\FOO\BAR\BAZ") returns new URL("file:/C:/FOO/BAR/BAZ"); 147 * @param href String to convert 148 * @param curdir Current directory, in case href is a relative filename 149 * @return URL corresponding to href 150 */ 151 public static URL HrefToURL (String href, File curdir) { 152 URL url; 153 154 try { 155 url = new URL (href); 156 157 String protocol = url.getProtocol (); 158 String host = url.getHost (); 159 String path = url.getFile (); 160 String ref = url.getRef (); 161 if (protocol.equals ("file") 162 && (host == null 163 || host.equals ("") 164 || host.equals ("localhost"))) { 165 // add a trailing slash to local directories; 166 // otherwise JDK 1.2 won't give them content-type text/html 167 if (path == null || path.length () == 0) 168 path = "/"; 169 File file = new File (path); 170 if (file.isDirectory () && !path.endsWith ("/")) 171 path += "/"; 172 url = new URL ("file:" + path); 173 if (ref != null && ref.length () > 0) 174 url = new URL (url, "#" + ref); 175 } 176 } 177 catch (MalformedURLException e) { 178 File f = new File (href); 179 if (!f.isAbsolute () && curdir != null) 180 f = new File (curdir, href); 181 url = FileToURL (f); 182 } 183 184 return url; 185 } 186 187 /** 188 * Get the URL of a page, omitting any anchor reference (like #ref). 189 * @param url URL to parse 190 * @return url sans anchor reference 191 */ 192 public static URL getPageURL (URL url) { 193 String href = url.toExternalForm (); 194 int i = href.indexOf ('#'); 195 try { 196 return (i != -1) ? new URL(href.substring (0, i)) : url; 197 } catch (MalformedURLException e) { 198 return url; 199 } 200 } 201 202 /** 203 * Get the URL of a Web service, omitting any query or anchor reference. 204 * @return the URL sans query and anchor reference 205 */ 206 public static URL getServiceURL (URL url) { 207 String href = url.toExternalForm (); 208 int i = href.indexOf ('?'); 209 try { 210 return (i != -1) 211 ? new URL(href.substring (0, i)) 212 : getPageURL(url); 213 } catch (MalformedURLException e) { 214 return url; 215 } 216 } 217 218 /** 219 * Get the URL of a page's directory. 220 * @param url URL to parse 221 * @return url sans filename, query and anchor reference 222 */ 223 public static URL getDirectoryURL (URL url) { 224 String file = url.getFile(); 225 int qmark = file.indexOf ('?'); 226 if (qmark == -1) 227 qmark = file.length(); 228 // find pivotal separator (between directory and filename) 229 int pivot = file.lastIndexOf ('/', Math.max(qmark-1, 0)); 230 try { 231 if (pivot == -1) 232 return new URL (url, "/"); 233 else if (pivot == file.length()-1) 234 return url; 235 else 236 return new URL (url, file.substring (0, pivot+1)); 237 } catch (MalformedURLException e) { 238 return url; 239 } 240 } 241 242 /** 243 * Get the URL of a page's parent directory. 244 * @param url URL to parse 245 * @return url sans filename, query and anchor reference 246 */ 247 public static URL getParentURL (URL url) { 248 URL dirURL = getDirectoryURL (url); 249 if (!dirURL.equals (url)) 250 return dirURL; 251 252 String dir = dirURL.getFile (); 253 int lastSlash = dir.length()-1; 254 if (lastSlash == 0) 255 return dirURL; 256 257 int penultSlash = dir.lastIndexOf ('/', lastSlash-1); 258 259 if (penultSlash == -1) 260 return dirURL; 261 262 try { 263 return new URL (url, dir.substring (0, penultSlash+1)); 264 } catch (MalformedURLException e) { 265 return dirURL; 266 } 267 } 268 269 /** 270 * Get the filename part of a URL, like "index.html". 271 * Never contains '/'; may be the empty string. 272 * @param url URL to parse 273 * @return the filename portion of url 274 */ 275 public static String getFilename (URL url) { 276 String file = url.getFile (); 277 String query = getQuery (url); 278 int qmark = file.length() - query.length (); 279 int slash = file.lastIndexOf ('/', qmark); 280 return file.substring (slash+1, qmark); 281 } 282 283 /** 284 * Get the query part of the link, like "?query". 285 * Either starts with a '?', or is the empty string. 286 * @param url URL to parse 287 * @return the query portion 288 */ 289 public static String getQuery (URL url) { 290 String file = url.getFile(); 291 int qmark = file.indexOf ('?'); 292 if (qmark == -1) 293 return ""; 294 else 295 return file.substring (qmark); 296 } 297 298 /** 299 * Get the basename part of a URL, which is the filename without extension: 300 * e.g., "index". Never contains '/' or '.'; may be the empty string. 301 * @param url URL to parse 302 * @return the basename portion of url 303 */ 304 public static String getBasename (URL url) { 305 String filename = getFilename (url); 306 int period = filename.indexOf ('.'); 307 return (period != -1) ? filename.substring (0, period) : filename; 308 } 309 310 /** 311 * Get the extension part of a URL, e.g. ".html". Either starts with 312 * a period or is the empty string. 313 * @param url URL to parse 314 * @return the basename portion of url 315 */ 316 public static String getExtension (URL url) { 317 String filename = getFilename (url); 318 int period = filename.indexOf ('.'); 319 return (period != -1) ? filename.substring (period) : ""; 320 } 321 322 /** 323 * Computes relative HREF for URL <I>there</I> when the current location 324 * is URL <I>here</I>. 325 */ 326 public static String relativeTo (URL here, URL there) { 327 if (here == null) 328 return there.toString(); 329 //System.err.println ("From: " + here); 330 //System.err.println ("To: " + there); 331 if (here.getProtocol().equals (there.getProtocol()) 332 && here.getHost().equals (there.getHost ()) 333 && here.getPort() == there.getPort ()) { 334 String fn = relativeTo (here.getFile (), 335 there.getFile ()); 336 String ref = there.getRef (); 337 return (ref != null) ? fn+ref : fn; 338 } 339 else { 340 //System.err.println ("Use: " + there); 341 return there.toString (); 342 } 343 } 344 345 /** 346 * Compute relative HREF for URL <i>there</i> when the current location 347 * is URL <i>here</i>. 348 */ 349 public static String relativeTo (URL here, String there) { 350 if (here == null) 351 return there; 352 try { 353 return relativeTo (here, new URL (here, there)); 354 } catch (MalformedURLException e) { 355 return there; 356 } 357 } 358 359 /** 360 * Compute relative HREF for filename <there> when the current location 361 * is filename <here. 362 */ 363 public static String relativeTo (String here, String there) { 364 StringBuffer result = new StringBuffer (); 365 366 int lcp = 0; 367 368 while (true) { 369 int i = here.indexOf ('/', lcp); 370 int j = there.indexOf ('/', lcp); 371 372 if (i == -1 || i != j || !here.regionMatches (lcp, there, lcp, i-lcp)) 373 break; 374 lcp = i+1; 375 } 376 377 // assert: first lcp characters of here and there are identical 378 // and (lcp==0 or here[lcp-1] == '/') 379 380 // here[0..lcp-1] is the common ancestor directory of here and there 381 382 // count hops up from here to the common ancestor 383 for (int i = here.indexOf ('/', lcp); 384 i != -1; 385 i = here.indexOf ('/', i+1)) { 386 result.append (".."); 387 result.append ('/'); 388 } 389 390 // append path down from common ancestor to there 391 result.append (there.substring (lcp)); 392 393 //System.out.println ("Use: " + result); 394 //System.out.println (); 395 396 return result.toString (); 397 } 398 399 /** 400 * Open a file or URL. 401 * @param href filename or URL to open 402 * @return stream of data from open file or Web page 403 */ 404 public static Reader open (String href) throws IOException { 405 return open (HrefToURL (href)); 406 } 407 408 /** 409 * Open a URL. 410 * @param url URL to open 411 * @return stream of data from url 412 */ 413 public static Reader open (URL url) throws IOException { 414 URLConnection conn = url.openConnection (); 415 InputStream in = conn.getInputStream (); 416 return new InputStreamReader (in); 417 } 418 }