001    /*
002     * LAPIS lightweight structured text processing system
003     *
004     * Copyright (C) 1998-2002 Carnegie Mellon University,
005     * Copyright (C) 2003 Massachusetts Institute of Technology.
006     * All rights reserved.
007     *
008     * This library is free software; you can redistribute it
009     * and/or modify it under the terms of the GNU General
010     * Public License as published by the Free Software
011     * Foundation, version 2.
012     *
013     * LAPIS homepage: http://graphics.lcs.mit.edu/lapis/
014     */
015    
016    package lapis.tools;
017    
018    import lapis.*;
019    import lapis.tc.*;
020    import lapisx.util.Str;
021    //import java.io.*;
022    //import java.util.*;
023    
024    public class Highlight implements Tool {
025        public static final lapisx.util.Debug debug = lapisx.util.Debug.QUIET;
026    
027        static final String TEXT = "text/plain";
028        static final String HTML = "text/html";
029        static final String TCL_LIST = "application/x-tcl-list";
030    
031        /**
032         * Invoke tool.
033         */
034        public Document invoke (Arguments args) throws Exception {
035            TC tcItems = null;
036            String contentType = HTML;
037            TC tcUnits = null; 
038    
039            args.setUsage (
040              "Usage:\n"
041            + "   highlight <pattern>     highlights regions matching <pattern>.\n"
042            + "                           In text, highlights >>like this<<\n"
043            + "                           In HTML, highlights in bold\n"
044            + "\n"
045            + "Options:\n"
046            + "    -as [text|html|same]   Output type (default is html)\n"
047            + "    -units <pattern>       Units to round to\n"
048            + "    -help                  Display this message\n"
049              );
050    
051            while (args.hasMoreElements ()) {
052                String name = args.nextName ();
053                if ("query".equals (name)
054                    || (name == null && tcItems == null)) {
055                    tcItems = args.nextPattern ();
056                }
057                else if ("as".equals (name)) {
058                    String type = args.nextString ();
059                    if ("text".equalsIgnoreCase (type)
060                        || TEXT.equalsIgnoreCase (type))
061                        contentType = TEXT;
062                    else if ("html".equalsIgnoreCase (type)
063                        || HTML.equalsIgnoreCase (type))
064                        contentType = HTML;
065                    else if ("same".equalsIgnoreCase (type))
066                        contentType = null;
067                    else
068                        args.usage ();
069                }
070                else if ("units".equals (name) || "unit".equals (name)) {
071                    tcUnits = args.nextPattern ();
072                }
073                else 
074                    args.consume (name);
075            }
076    
077            // Error if no pattern
078            if (tcItems == null)
079                args.usage ();
080    
081            if (tcUnits == null)
082                tcUnits = new TC (new TC.Id (".Layout.Line"));
083    
084            String endsWith, highlightStart, highlightEnd;
085            if (HTML.equals (contentType)) {
086                endsWith = "<br>\n";
087                highlightStart = "<b>";
088                highlightEnd = "</b>";
089            } else {
090                endsWith = null;
091                highlightStart = ">>";
092                highlightEnd = "<<";
093            }
094    
095            Document doc = args.getInput ();
096            DocumentWriter output = args.getOutput ();
097            String docContentType = (String) doc.getProperty (doc.MIMEProperty);
098    
099            if (contentType == null)
100                contentType = docContentType;
101    
102            BasicDocument.copyVitalProperties (output, doc);
103            if (contentType != null)
104                output.putProperty (doc.MIMEProperty, contentType);
105    
106            RegionSet items = tcItems.matchWithWarnings (doc);
107            RegionSet units = tcUnits.matchWithWarnings (doc);
108    
109            boolean html2text = (TEXT.equals (contentType)
110                                 && HTML.equals (docContentType));
111            boolean text2html = (HTML.equals (contentType)
112                                 && TEXT.equals (docContentType));
113    
114                    DocumentView view = doc.getDefaultView ();            
115            if (html2text) {
116                view = doc.getCookedView();
117                items = view.getVersion().convert (items.getSingleDocRegionSet(doc));
118                units = view.getVersion().convert (units.getSingleDocRegionSet(doc));
119            }
120    
121            items = items.flatten ();
122            units = units.flatten ();
123    
124            Relation recordItems = new Relation ();
125            MutableRegionSet records = new MutableRegionSet ();
126            RegionEnumeration e = items.regions ();
127            for (Region item = e.firstFast (); item != null; item = e.nextFast ()) {
128                Region record = (Region) item.include (units);
129                if (!recordItems.add (record, item))
130                    records.insert (record);
131            }
132    
133            e = records.regions (); 
134            for (Region record = e.firstFast (); record != null; record = e.nextFast ()) {
135                String rec = view.getText(record);
136                if (html2text)
137                    rec = rec.trim ();
138                else if (text2html) {
139                    rec = Str.replace (rec, "&", "&");
140                    rec = Str.replace (rec, "<", "<");
141                    rec = Str.replace (rec, ">", ">");
142                }
143                
144                int off = 0; // number of characters inserted in rec
145                RegionEnumeration re = recordItems.get (record).regions ();
146                for (Region item = re.firstFast ();
147                     item != null;
148                     item = re.nextFast ()) {
149                    debug.println ("highlighting " + item + " in " + record);
150                    int st = item.getStart () - record.getStart () + off;
151                    int en = item.getEnd () - record.getStart () + off;
152                    debug.println ("that's [" + st + "," + en + "] relative");
153                    rec = rec.substring (0, st)
154                        + highlightStart
155                        + rec.substring (st, en)
156                        + highlightEnd
157                        + rec.substring (en);
158                    off += highlightStart.length () + highlightEnd.length ();
159                }
160        
161                output.print (rec);
162                if (!rec.endsWith ("\n"))
163                    output.println ();
164                output.print (endsWith);
165            }
166    
167            return output;
168        }
169    }