001 /* 002 * LAPIS lightweight structured text processing system 003 * 004 * Copyright (C) 1998-2002 Carnegie Mellon University, 005 * Copyright (C) 2003 Massachusetts Institute of Technology. 006 * All rights reserved. 007 * 008 * This library is free software; you can redistribute it 009 * and/or modify it under the terms of the GNU General 010 * Public License as published by the Free Software 011 * Foundation, version 2. 012 * 013 * LAPIS homepage: http://graphics.lcs.mit.edu/lapis/ 014 */ 015 016 package lapis.tools; 017 018 import lapis.*; 019 import lapis.tc.*; 020 import lapisx.util.Str; 021 //import java.io.*; 022 //import java.util.*; 023 024 public class Highlight implements Tool { 025 public static final lapisx.util.Debug debug = lapisx.util.Debug.QUIET; 026 027 static final String TEXT = "text/plain"; 028 static final String HTML = "text/html"; 029 static final String TCL_LIST = "application/x-tcl-list"; 030 031 /** 032 * Invoke tool. 033 */ 034 public Document invoke (Arguments args) throws Exception { 035 TC tcItems = null; 036 String contentType = HTML; 037 TC tcUnits = null; 038 039 args.setUsage ( 040 "Usage:\n" 041 + " highlight <pattern> highlights regions matching <pattern>.\n" 042 + " In text, highlights >>like this<<\n" 043 + " In HTML, highlights in bold\n" 044 + "\n" 045 + "Options:\n" 046 + " -as [text|html|same] Output type (default is html)\n" 047 + " -units <pattern> Units to round to\n" 048 + " -help Display this message\n" 049 ); 050 051 while (args.hasMoreElements ()) { 052 String name = args.nextName (); 053 if ("query".equals (name) 054 || (name == null && tcItems == null)) { 055 tcItems = args.nextPattern (); 056 } 057 else if ("as".equals (name)) { 058 String type = args.nextString (); 059 if ("text".equalsIgnoreCase (type) 060 || TEXT.equalsIgnoreCase (type)) 061 contentType = TEXT; 062 else if ("html".equalsIgnoreCase (type) 063 || HTML.equalsIgnoreCase (type)) 064 contentType = HTML; 065 else if ("same".equalsIgnoreCase (type)) 066 contentType = null; 067 else 068 args.usage (); 069 } 070 else if ("units".equals (name) || "unit".equals (name)) { 071 tcUnits = args.nextPattern (); 072 } 073 else 074 args.consume (name); 075 } 076 077 // Error if no pattern 078 if (tcItems == null) 079 args.usage (); 080 081 if (tcUnits == null) 082 tcUnits = new TC (new TC.Id (".Layout.Line")); 083 084 String endsWith, highlightStart, highlightEnd; 085 if (HTML.equals (contentType)) { 086 endsWith = "<br>\n"; 087 highlightStart = "<b>"; 088 highlightEnd = "</b>"; 089 } else { 090 endsWith = null; 091 highlightStart = ">>"; 092 highlightEnd = "<<"; 093 } 094 095 Document doc = args.getInput (); 096 DocumentWriter output = args.getOutput (); 097 String docContentType = (String) doc.getProperty (doc.MIMEProperty); 098 099 if (contentType == null) 100 contentType = docContentType; 101 102 BasicDocument.copyVitalProperties (output, doc); 103 if (contentType != null) 104 output.putProperty (doc.MIMEProperty, contentType); 105 106 RegionSet items = tcItems.matchWithWarnings (doc); 107 RegionSet units = tcUnits.matchWithWarnings (doc); 108 109 boolean html2text = (TEXT.equals (contentType) 110 && HTML.equals (docContentType)); 111 boolean text2html = (HTML.equals (contentType) 112 && TEXT.equals (docContentType)); 113 114 DocumentView view = doc.getDefaultView (); 115 if (html2text) { 116 view = doc.getCookedView(); 117 items = view.getVersion().convert (items.getSingleDocRegionSet(doc)); 118 units = view.getVersion().convert (units.getSingleDocRegionSet(doc)); 119 } 120 121 items = items.flatten (); 122 units = units.flatten (); 123 124 Relation recordItems = new Relation (); 125 MutableRegionSet records = new MutableRegionSet (); 126 RegionEnumeration e = items.regions (); 127 for (Region item = e.firstFast (); item != null; item = e.nextFast ()) { 128 Region record = (Region) item.include (units); 129 if (!recordItems.add (record, item)) 130 records.insert (record); 131 } 132 133 e = records.regions (); 134 for (Region record = e.firstFast (); record != null; record = e.nextFast ()) { 135 String rec = view.getText(record); 136 if (html2text) 137 rec = rec.trim (); 138 else if (text2html) { 139 rec = Str.replace (rec, "&", "&"); 140 rec = Str.replace (rec, "<", "<"); 141 rec = Str.replace (rec, ">", ">"); 142 } 143 144 int off = 0; // number of characters inserted in rec 145 RegionEnumeration re = recordItems.get (record).regions (); 146 for (Region item = re.firstFast (); 147 item != null; 148 item = re.nextFast ()) { 149 debug.println ("highlighting " + item + " in " + record); 150 int st = item.getStart () - record.getStart () + off; 151 int en = item.getEnd () - record.getStart () + off; 152 debug.println ("that's [" + st + "," + en + "] relative"); 153 rec = rec.substring (0, st) 154 + highlightStart 155 + rec.substring (st, en) 156 + highlightEnd 157 + rec.substring (en); 158 off += highlightStart.length () + highlightEnd.length (); 159 } 160 161 output.print (rec); 162 if (!rec.endsWith ("\n")) 163 output.println (); 164 output.print (endsWith); 165 } 166 167 return output; 168 } 169 }