001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package examples.mail;
019
020import java.io.BufferedWriter;
021import java.io.File;
022import java.io.FileWriter;
023import java.io.IOException;
024import java.net.URI;
025import java.net.URISyntaxException;
026import java.text.ParseException;
027import java.text.SimpleDateFormat;
028import java.util.ArrayList;
029import java.util.Date;
030import java.util.Iterator;
031import java.util.List;
032import java.util.TimeZone;
033import java.util.regex.Matcher;
034import java.util.regex.Pattern;
035
036import org.apache.commons.net.PrintCommandListener;
037import org.apache.commons.net.ProtocolCommandEvent;
038import org.apache.commons.net.imap.IMAP.IMAPChunkListener;
039import org.apache.commons.net.imap.IMAP;
040import org.apache.commons.net.imap.IMAPClient;
041import org.apache.commons.net.imap.IMAPReply;
042
043/**
044 * This is an example program demonstrating how to use the IMAP[S]Client class.
045 * This program connects to a IMAP[S] server and exports selected messages from a folder into an mbox file.
046 * <p>
047 * Usage: IMAPExportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [sequence-set] [item-names]
048 * <p>
049 * An example sequence-set might be:
050 * <ul>
051 * <li>11,2,3:10,20:*</li>
052 * <li>1:* - this is the default</li>
053 * </ul>
054 * <p>
055 * Some example item-names might be:
056 * <ul>
057 * <li>BODY.PEEK[HEADER]</li>
058 * <li>'BODY.PEEK[HEADER.FIELDS (SUBJECT)]'</li>
059 * <li>ALL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE)'</li>
060 * <li>FAST - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE)'</li>
061 * <li>FULL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE BODY)'</li>
062 * <li>ENVELOPE X-GM-LABELS</li>
063 * <li>'(INTERNALDATE BODY.PEEK[])' - this is the default</li>
064 * </ul>
065 * <p>
066 * Macro names cannot be combined with anything else; they must be used alone.<br>
067 * Note that using BODY will set the \Seen flag. This is why the default uses BODY.PEEK[].<br>
068 * The item name X-GM-LABELS is a Google Mail extension; it shows the labels for a message.<br>
069 * For example:<br>
070 * IMAPExportMbox imaps://username:password@imap.googlemail.com/messages_for_export exported.mbox 1:10,20<br>
071 * IMAPExportMbox imaps://username:password@imap.googlemail.com/messages_for_export exported.mbox 3 ENVELOPE X-GM-LABELS<br>
072 * <p>
073 * The sequence-set is passed unmodified to the FETCH command.<br>
074 * The item names are wrapped in parentheses if more than one is provided.
075 * Otherwise, the parameter is assumed to be wrapped if necessary.<br>
076 * Parameters with spaces must be quoted otherwise the OS shell will normally treat them as separate parameters.<br>
077 * Also the listener that writes the mailbox only captures the multi-line responses (e.g. ones that include BODY references).
078 * It does not capture the output from FETCH commands using item names such as ENVELOPE or FLAGS that return a single line response.
079 */
080public final class IMAPExportMbox
081{
082
083    private static final String CRLF = "\r\n";
084    private static final String LF = "\n";
085    private static final String EOL_DEFAULT = System.getProperty("line.separator");
086
087    private static final Pattern PATFROM = Pattern.compile(">*From "); // unescaped From_
088    // e.g. * nnn (INTERNALDATE "27-Oct-2013 07:43:24 +0000"  BODY[] {nn} ...)
089    private static final Pattern PATID = // INTERNALDATE
090            Pattern.compile(".*INTERNALDATE \"(\\d\\d-\\w{3}-\\d{4} \\d\\d:\\d\\d:\\d\\d [+-]\\d+)\"");
091    private static final int PATID_DATE_GROUP = 1;
092
093    private static final Pattern PATSEQ = Pattern.compile("\\* (\\d+) "); // Sequence number
094    private static final int PATSEQ_SEQUENCE_GROUP = 1;
095
096    // e.g. * 382 EXISTS
097    private static final Pattern PATEXISTS = Pattern.compile("\\* (\\d+) EXISTS"); // Response from SELECT
098
099    // AAAC NO [TEMPFAIL] FETCH Temporary failure on server [CODE: WBL]
100    private static final Pattern PATTEMPFAIL = Pattern.compile("[A-Z]{4} NO \\[TEMPFAIL\\] FETCH .*");
101
102    private static final int CONNECT_TIMEOUT = 10; // Seconds
103    private static final int READ_TIMEOUT = 10;
104
105    public static void main(String[] args) throws IOException, URISyntaxException
106    {
107        int connect_timeout = CONNECT_TIMEOUT;
108        int read_timeout = READ_TIMEOUT;
109
110        int argIdx = 0;
111        String eol = EOL_DEFAULT;
112        boolean printHash = false;
113        boolean printMarker = false;
114        int retryWaitSecs = 0;
115
116        for(argIdx = 0; argIdx < args.length; argIdx++) {
117            if (args[argIdx].equals("-c")) {
118                connect_timeout = Integer.parseInt(args[++argIdx]);
119            } else if (args[argIdx].equals("-r")) {
120                read_timeout = Integer.parseInt(args[++argIdx]);
121            } else if (args[argIdx].equals("-R")) {
122                retryWaitSecs = Integer.parseInt(args[++argIdx]);
123            } else if (args[argIdx].equals("-LF")) {
124                eol = LF;
125            } else if (args[argIdx].equals("-CRLF")) {
126                eol = CRLF;
127            } else if (args[argIdx].equals("-.")) {
128                printHash = true;
129            } else if (args[argIdx].equals("-X")) {
130                printMarker = true;
131            } else {
132                break;
133            }
134        }
135
136        final int argCount = args.length - argIdx;
137
138        if (argCount < 2)
139        {
140            System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]" +
141                               " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]");
142            System.err.println("\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)");
143            System.err.println("\t-c connect timeout in seconds (default 10)");
144            System.err.println("\t-r read timeout in seconds (default 10)");
145            System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)");
146            System.err.println("\t-. print a . for each complete message received");
147            System.err.println("\t-X print the X-IMAP line for each complete message received");
148            System.err.println("\tthe mboxfile is where the messages are stored; use '-' to write to standard output.");
149            System.err.println("\tPrefix filename with '+' to append to the file. Prefix with '-' to allow overwrite.");
150            System.err.println("\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*");
151            System.err.println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]" +
152                               " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])");
153            System.exit(1);
154        }
155
156        final String uriString = args[argIdx++];
157        URI uri;
158        try {
159            uri      = URI.create(uriString);
160        } catch(IllegalArgumentException e) { // cannot parse the path as is; let's pull it apart and try again
161            Matcher m = Pattern.compile("(imaps?://[^/]+)(/.*)").matcher(uriString);
162            if (m.matches()) {
163                uri = URI.create(m.group(1)); // Just the scheme and auth parts
164                uri = new URI(uri.getScheme(), uri.getAuthority(), m.group(2), null, null);
165            } else {
166                throw e;
167            }
168        }
169        final String file  = args[argIdx++];
170        String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*";
171        final String itemNames;
172        // Handle 0, 1 or multiple item names
173        if (argCount > 3) {
174            if (argCount > 4) {
175                StringBuilder sb = new StringBuilder();
176                sb.append("(");
177                for(int i=4; i <= argCount; i++) {
178                    if (i>4) {
179                        sb.append(" ");
180                    }
181                    sb.append(args[argIdx++]);
182                }
183                sb.append(")");
184                itemNames = sb.toString();
185            } else {
186                itemNames = args[argIdx++];
187            }
188        } else {
189            itemNames = "(INTERNALDATE BODY.PEEK[])";
190        }
191
192        final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)"); // are we expecting a sequence?
193        final MboxListener chunkListener;
194        if (file.equals("-")) {
195            chunkListener = null;
196        } else if (file.startsWith("+")) {
197            final File mbox = new File(file.substring(1));
198            System.out.println("Appending to file " + mbox);
199            chunkListener = new MboxListener(
200                new BufferedWriter(new FileWriter(mbox, true)), eol, printHash, printMarker, checkSequence);
201        } else if (file.startsWith("-")) {
202            final File mbox = new File(file.substring(1));
203            System.out.println("Writing to file " + mbox);
204            chunkListener = new MboxListener(
205                new BufferedWriter(new FileWriter(mbox, false)), eol, printHash, printMarker, checkSequence);
206        } else {
207            final File mbox = new File(file);
208            if (mbox.exists() && mbox.length() > 0) {
209                throw new IOException("mailbox file: " + mbox + " already exists and is non-empty!");
210            }
211            System.out.println("Creating file " + mbox);
212            chunkListener = new MboxListener(new BufferedWriter(new FileWriter(mbox)), eol, printHash, printMarker, checkSequence);
213        }
214
215        String path = uri.getPath();
216        if (path == null || path.length() < 1) {
217            throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
218        }
219        String folder = path.substring(1); // skip the leading /
220
221        // suppress login details
222        final PrintCommandListener listener = new PrintCommandListener(System.out, true) {
223            @Override
224            public void protocolReplyReceived(ProtocolCommandEvent event) {
225                if (event.getReplyCode() != IMAPReply.PARTIAL){ // This is dealt with by the chunk listener
226                    super.protocolReplyReceived(event);
227                }
228            }
229        };
230
231        // Connect and login
232        final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener);
233
234        String maxIndexInFolder = null;
235
236        try {
237
238            imap.setSoTimeout(read_timeout * 1000);
239
240            if (!imap.select(folder)){
241                throw new IOException("Could not select folder: " + folder);
242            }
243
244            for(String line : imap.getReplyStrings()) {
245                maxIndexInFolder = matches(line, PATEXISTS, 1);
246                if (maxIndexInFolder != null) {
247                    break;
248                }
249            }
250
251            if (chunkListener != null) {
252                imap.setChunkListener(chunkListener);
253            } // else the command listener displays the full output without processing
254
255
256            while(true) {
257                boolean ok = imap.fetch(sequenceSet, itemNames);
258                // If the fetch failed, can we retry?
259                if (!ok && retryWaitSecs > 0 && chunkListener != null && checkSequence) {
260                    final String replyString = imap.getReplyString(); //includes EOL
261                    if (startsWith(replyString, PATTEMPFAIL)) {
262                        System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds");
263                        sequenceSet = (chunkListener.lastSeq+1)+":*";
264                        try {
265                            Thread.sleep(retryWaitSecs * 1000);
266                        } catch (InterruptedException e) {
267                            // ignored
268                        }
269                    } else {
270                        throw new IOException("FETCH " + sequenceSet + " " + itemNames+ " failed with " + replyString);
271                    }
272                } else {
273                    break;
274                }
275            }
276
277        } catch (IOException ioe) {
278            String count = chunkListener == null ? "?" : Integer.toString(chunkListener.total);
279            System.err.println(
280                    "FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count + " complete messages ");
281            if (chunkListener != null) {
282                System.err.println("Last complete response seen: "+chunkListener.lastFetched);
283            }
284            throw ioe;
285        } finally {
286
287            if (printHash) {
288                System.err.println();
289            }
290
291            if (chunkListener != null) {
292                chunkListener.close();
293                final Iterator<String> missingIds = chunkListener.missingIds.iterator();
294                if (missingIds.hasNext()) {
295                    StringBuilder sb = new StringBuilder();
296                    for(;;) {
297                        sb.append(missingIds.next());
298                        if (!missingIds.hasNext()) {
299                            break;
300                        }
301                        sb.append(",");
302                    }
303                    System.err.println("*** Missing ids: " + sb.toString());
304                }
305            }
306            imap.logout();
307            imap.disconnect();
308        }
309        if (chunkListener != null) {
310            System.out.println("Processed " + chunkListener.total + " messages.");
311        }
312        if (maxIndexInFolder != null) {
313            System.out.println("Folder contained " + maxIndexInFolder + " messages.");
314        }
315    }
316
317    private static boolean startsWith(String input, Pattern pat) {
318        Matcher m = pat.matcher(input);
319        return m.lookingAt();
320    }
321
322    private static String matches(String input, Pattern pat, int index) {
323        Matcher m = pat.matcher(input);
324        if (m.lookingAt()) {
325            return m.group(index);
326        }
327        return null;
328    }
329
330    private static class MboxListener implements IMAPChunkListener {
331
332        private final BufferedWriter bw;
333        volatile int total = 0;
334        volatile String lastFetched;
335        volatile List<String> missingIds = new ArrayList<String>();
336        volatile long lastSeq = -1;
337        private final String eol;
338        private final SimpleDateFormat DATE_FORMAT // for mbox From_ lines
339            = new SimpleDateFormat("EEE MMM dd HH:mm:ss YYYY");
340
341        // e.g. INTERNALDATE "27-Oct-2013 07:43:24 +0000"
342        private final SimpleDateFormat IDPARSE // for parsing INTERNALDATE
343        = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss Z");
344        private final boolean printHash;
345        private final boolean printMarker;
346        private final boolean checkSequence;
347
348        MboxListener(BufferedWriter bw, String eol, boolean printHash, boolean printMarker, boolean checkSequence)
349                throws IOException {
350          this.eol = eol;
351          this.printHash = printHash;
352          this.printMarker = printMarker;
353          DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("GMT"));
354          this.bw = bw;
355          this.checkSequence = checkSequence;
356        }
357
358        @Override
359        public boolean chunkReceived(IMAP imap) {
360            final String[] replyStrings = imap.getReplyStrings();
361            Date received = new Date();
362            final String firstLine = replyStrings[0];
363            Matcher m = PATID.matcher(firstLine);
364            if (m.lookingAt()) { // found a match
365                String date = m.group(PATID_DATE_GROUP);
366                try {
367                    received=IDPARSE.parse(date);
368                } catch (ParseException e) {
369                    System.err.println(e);
370                }
371            } else {
372                System.err.println("No timestamp found in: " + firstLine + "  - using current time");
373            }
374            String replyTo = "MAILER-DAEMON"; // default
375            for(int i=1; i< replyStrings.length - 1; i++) {
376                final String line = replyStrings[i];
377                if (line.startsWith("Return-Path: ")) {
378                   String[] parts = line.split(" ", 2);
379                    replyTo = parts[1];
380                    if (replyTo.startsWith("<")) {
381                        replyTo = replyTo.substring(1,replyTo.length()-1); // drop <> wrapper
382                    } else {
383                        System.err.println("Unexpected Return-path:" + line+ " in " + firstLine);
384                    }
385                    break;
386                }
387            }
388            try {
389                // Add initial mbox header line
390                bw.append("From ");
391                bw.append(replyTo);
392                bw.append(' ');
393                bw.append(DATE_FORMAT.format(received));
394                bw.append(eol);
395                // Debug
396                bw.append("X-IMAP-Response: ").append(firstLine).append(eol);
397                if (printMarker) {
398                    System.err.println("[" + total + "] " + firstLine);
399                }
400                // Skip first and last lines
401                for(int i=1; i< replyStrings.length - 1; i++) {
402                    final String line = replyStrings[i];
403                        if (startsWith(line, PATFROM)) {
404                            bw.append('>'); // Escape a From_ line
405                        }
406                        bw.append(line);
407                        bw.append(eol);
408                }
409                // The last line ends with the trailing closing ")" which needs to be stripped
410                String lastLine = replyStrings[replyStrings.length-1];
411                final int lastLength = lastLine.length();
412                if (lastLength > 1) { // there's some content, we need to save it
413                    bw.append(lastLine, 0, lastLength-1);
414                    bw.append(eol);
415                }
416                bw.append(eol); // blank line between entries
417            } catch (IOException e) {
418                e.printStackTrace();
419                throw new RuntimeException(e); // chunkReceived cannot throw a checked Exception
420            }
421            lastFetched = firstLine;
422            total++;
423            if (checkSequence) {
424                m = PATSEQ.matcher(firstLine);
425                if (m.lookingAt()) { // found a match
426                    final long msgSeq = Long.parseLong(m.group(PATSEQ_SEQUENCE_GROUP)); // Cannot fail to parse
427                    if (lastSeq != -1) {
428                        long missing = msgSeq - lastSeq - 1;
429                        if (missing != 0) {
430                            for(long j = lastSeq + 1; j < msgSeq; j++) {
431                                missingIds.add(String.valueOf(j));
432                            }
433                            System.err.println(
434                                "*** Sequence error: current=" + msgSeq + " previous=" + lastSeq + " Missing=" + missing);
435                        }
436                    }
437                    lastSeq = msgSeq;
438                }
439            }
440            if (printHash) {
441                System.err.print(".");
442            }
443            return true;
444        }
445
446        public void close() throws IOException {
447            if (bw != null) {
448                bw.close();
449            }
450        }
451    }
452}