Logo Search packages:      
Sourcecode: mailliststat version File versions  Download package

mls_stat.c

/****************************************************************************
    MailListStat - print useful statistics on email messages
    stats gathering & computing functions
    Copyright (C) 2001-2003  Marek Podmaka <marki@nexin.sk>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 ****************************************************************************/

#include "mls.h"
#include "mls_stat.h"
#include "mls_mime.h"
#include "mls_list.h"

extern int verbose;
extern time_t t, t_oldest, t_newest;
extern long nSprav, nDlzka;
extern long nDen[31];
extern long nHod[24];
extern long nDOW[ 7];
extern long nMon[12];
extern nQptr msgQ;
extern nDptr msgD;
extern nTptr zozA, zozS, zozQ, zozM, zozAt, zozAa;
extern FILE *fInp;

/* ***** GetEmail ******************************************************** */
void GetEmail(char *auth) { // return only email address
  regmatch_t pm[MAX_REGMATCH];  // buffer for matching substrings
  char auth2[MAX_AUTH];
  RemoveCR(auth);
  if (regexec(&r_mail, auth, MAX_REGMATCH, pm, 0)) return; // should not happen
  myCopy(auth2, auth+pm[2].rm_so, pm[2].rm_eo - pm[2].rm_so + 1);
  myCopy(auth, auth2, MAX_AUTH);
}

/* ***** RemoveCR ******************************************************** */
void RemoveCR(char *text) { // remove '\n' from end
  if (text[strlen(text)-2]=='\n') text[strlen(text)-2]='\0';
  if (text[strlen(text)-1]=='\n') text[strlen(text)-1]='\0';
}

/* ***** GetSubj ********************************************************* */
void GetSubj(char *subj) { // MIME-decode & remove "Re:" from begin of subj.
  regmatch_t pm[MAX_REGMATCH];  // buffer for matching substrings
  char sub2[MAX_SUBJ];
  RemoveCR(subj);
  Decode_mime_string(subj, MAX_SUBJ);
  if (regexec(&r_re, subj, MAX_REGMATCH, pm, 0)) return; // should not happen
  myCopy(sub2, subj+pm[3].rm_so, pm[3].rm_eo - pm[3].rm_so + 1);
  myCopy(subj, sub2, MAX_SUBJ);
}

/* ***** GetMailer ******************************************************* */
void GetMailer(char *mail) { // remove versions from X-Mailer/User-Agent/X-Newsreader line
  regmatch_t pm[MAX_REGMATCH];  // buffer for matching substrings
  char sub2[MAX_SUBJ];          // temp for storing result, will be copied back to mail at the end
  RemoveCR(mail);
  myCopy(sub2, mail, MAX_SUBJ); // copy original there for case no regex will match

  if (!regexec(&r_m_bat, mail, MAX_REGMATCH, pm, 0)) { // The Bat!
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_ims, mail, MAX_REGMATCH, pm, 0)) { // Internet Mail Service
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so]=' ';
    myCopy(sub2 + pm[1].rm_eo - pm[1].rm_so + 1, mail+pm[2].rm_so, pm[2].rm_eo - pm[2].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so + pm[2].rm_eo - pm[2].rm_so+1]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so + pm[2].rm_eo - pm[2].rm_so+2]='\0';
  }
  if (!regexec(&r_m_moz, mail, MAX_REGMATCH, pm, 0)) { // Mozilla
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so+0]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so+1]='\0';
  }
  if (!regexec(&r_m_ope, mail, MAX_REGMATCH, pm, 0)) { // Opera
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so+0]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so+1]='\0';
  }
  if (!regexec(&r_m_oue, mail, MAX_REGMATCH, pm, 0)) { // Outlook Express
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so+0]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so+1]='\0';
  }
  if (!regexec(&r_m_ouc, mail, MAX_REGMATCH, pm, 0)) { // Outlook CWS
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_oum, mail, MAX_REGMATCH, pm, 0)) { // Outlook IMO
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_out, mail, MAX_REGMATCH, pm, 0)) { // Outlook
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_lot, mail, MAX_REGMATCH, pm, 0)) { // Lotus Notes
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_cal, mail, MAX_REGMATCH, pm, 0)) { // Calypso
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_peg, mail, MAX_REGMATCH, pm, 0)) { // Pegasus4win
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_opw, mail, MAX_REGMATCH, pm, 0)) { // Open Webmail
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_eud, mail, MAX_REGMATCH, pm, 0)) { // Eudora
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_pos, mail, MAX_REGMATCH, pm, 0)) { // POSTIE
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_pob, mail, MAX_REGMATCH, pm, 0)) { // POBOX
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_kma, mail, MAX_REGMATCH, pm, 0)) { // KMail
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_mut, mail, MAX_REGMATCH, pm, 0)) { // Mutt
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_imp, mail, MAX_REGMATCH, pm, 0)) { // IMP webmail
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_syl, mail, MAX_REGMATCH, pm, 0)) { // Sylpheed
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so+0]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so+1]='\0';
  }
  if (!regexec(&r_m_pin, mail, MAX_REGMATCH, pm, 0)) { // Pine
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
  }
  if (!regexec(&r_m_pi2, mail, MAX_REGMATCH, pm, 0)) { // Pine work-around
    // remove Message-ID in case no mailer was found there
    // Message-ID is used only if other headers are not present
    *sub2='\0';
  }

  // (these contributed by Urke MMI <urke@gmx.net>)
  if (!regexec(&r_m_pan, mail, MAX_REGMATCH, pm, 0)) { // Pan
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so+0]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so+1]='\0';
  }
  if (!regexec(&r_m_4td, mail, MAX_REGMATCH, pm, 0)) { // 40tude_Dialog
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so+0]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so+1]='\0';
  }
  if (!regexec(&r_m_fag, mail, MAX_REGMATCH, pm, 0)) { // Forte Agent
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so+0]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so+1]='\0';
  }
  if (!regexec(&r_m_mpg, mail, MAX_REGMATCH, pm, 0)) { // MicroPlanet Gravity
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so+0]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so+1]='\0';
  }
  if (!regexec(&r_m_xws, mail, MAX_REGMATCH, pm, 0)) { // Xnews
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so+0]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so+1]='\0';
  }
  if (!regexec(&r_m_knd, mail, MAX_REGMATCH, pm, 0)) { // KNode
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so+0]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so+1]='\0';
  }
  if (!regexec(&r_m_hst, mail, MAX_REGMATCH, pm, 0)) { // Hamster
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so+0]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so+1]='\0';
  }
  if (!regexec(&r_m_nnr, mail, MAX_REGMATCH, pm, 0)) { // Noworyta News Reader
    myCopy(sub2, mail+pm[1].rm_so, pm[1].rm_eo - pm[1].rm_so + 1);
    sub2[pm[1].rm_eo - pm[1].rm_so+0]='x';
    sub2[pm[1].rm_eo - pm[1].rm_so+1]='\0';
  }

  myCopy(mail, sub2, MAX_SUBJ);
}

/* ***** PrintProgress *************************************************** */
void PrintProgress() { // print no. of messages processed
  long delim=10;
  if (!verbose) return;
  while ((float)(nSprav/delim)>10) delim*=10;
  if (!(nSprav%delim)) fprintf(stderr,"(i) Processing message no.%5lu\n",nSprav);
}

/* ***** AddStat ********************************************************* */
void AddStat(char *A, char *S, char *D, char *M, long sQ, long sN) {
/* A ... Author (From: line)
   S ... Subject line
   D ... Date line
   M ... X-Mailer/User-Agent/X-Newsreader line
  sQ ... size of quoted text
  sN ... size of non-quoted text
*/
  regmatch_t pm[MAX_REGMATCH];  // buffer for matching substrings
  long sT; // total size of message body
  long day,mon,yea;  // day, month, year of curr. mess.
  time_t t_mess;     // date of current message (seconds)
  struct tm tm_mess; // date of curr. message
  char time[6];

  sT=sQ+sN; // sum of quoted + non-quoted
  // quote
  if ((float)sQ/sT > msgQ->perc) {
     myCopy(msgQ->auth, A, MAX_AUTH);
     myCopy(msgQ->subj, S, MAX_SUBJ);
     myCopy(msgQ->date, D, MAX_DATE);
     msgQ->size=sT;
     msgQ->perc=(float)sQ/sT;
  }
  // size
  if (sT > msgD->size) {
     myCopy(msgD->auth, A, MAX_AUTH);
     myCopy(msgD->subj, S, MAX_SUBJ);
     myCopy(msgD->date, D, MAX_DATE);
     msgD->size=sT;
  }
  zozA=AddEntry(zozA, A, sT, sQ, compCount); // author
  zozS=AddEntry(zozS, S, sT, sQ, compCount); // subject
  zozM=AddEntry(zozM, M, sT, sQ, compCount); // mailer
  // time
  if (!regexec(&r_time, D, MAX_REGMATCH, pm, 0)) { // if matched
     sT=pm[1].rm_eo - pm[1].rm_so + 1;
     myCopy(time, D + pm[1].rm_so, (sT>3)?3:sT); // it should not match more than 2 chars (+ \0)
     sT=atoi(time);
     if ((sT>=0) && (sT<24)) nHod[sT]++;
  }  else myVerb(" !  Invalid time: ",D);

  // day of week (DOW)
  if (!regexec(&r_dow, D, MAX_REGMATCH, pm, 0)) { // if matched
     sT=pm[2].rm_eo - pm[2].rm_so + 1;
     myCopy(time, D + pm[2].rm_so, (sT>4)?4:sT); // it should not match more than 3 chars (+ \0)
     sT=9; // default (invalid) value
     if (!strcmp(time, "Mon")) sT=0;
     if (!strcmp(time, "Tue")) sT=1;
     if (!strcmp(time, "Wed")) sT=2;
     if (!strcmp(time, "Thu")) sT=3;
     if (!strcmp(time, "Fri")) sT=4;
     if (!strcmp(time, "Sat")) sT=5;
     if (!strcmp(time, "Sun")) sT=6;
     if ((sT>=0) && (sT<7)) nDOW[sT]++;
  }  else myVerb(" !  Invalid day of week: ",D);

  // date
  day=mon=yea=0;
  if (!regexec(&r_date, D, MAX_REGMATCH, pm, 0)) { // if matched
     sT=pm[2].rm_eo - pm[2].rm_so + 1;
     if (sT > 0) {
        myCopy(time, D + pm[2].rm_so, (sT>3)?3:sT); // it should not match more than 2 chars (+ \0)
        sT=atoi(time)-1; // days in month begin from 1 (but our array from 0)
        if ((sT>=0) && (sT<31)) nDen[sT]++;
     }
     // oldest/newest message - match whole date, convert it to seconds
     day=sT+1;
     sT=pm[3].rm_eo - pm[3].rm_so + 1; // month in text form
     if (sT > 0) {
        myCopy(time, D + pm[3].rm_so, (sT>4)?4:sT); // it should not match more than 3 chars (+ \0)
        if (!strncmp(time,"Jan",3)) mon=1;
        if (!strncmp(time,"Feb",3)) mon=2;
        if (!strncmp(time,"Mar",3)) mon=3;
        if (!strncmp(time,"Apr",3)) mon=4;
        if (!strncmp(time,"May",3)) mon=5;
        if (!strncmp(time,"Jun",3)) mon=6;
        if (!strncmp(time,"Jul",3)) mon=7;
        if (!strncmp(time,"Aug",3)) mon=8;
        if (!strncmp(time,"Sep",3)) mon=9;
        if (!strncmp(time,"Oct",3)) mon=10;
        if (!strncmp(time,"Nov",3)) mon=11;
        if (!strncmp(time,"Dec",3)) mon=12;
     }
     sT=pm[4].rm_eo - pm[4].rm_so + 1; // year
     if (sT > 0) {
        myCopy(time, D + pm[4].rm_so, (sT>5)?5:sT); // it should not match more than 4 chars (+ \0)
        yea=atoi(time);
     }
     if (!day || !mon || !yea) myVerb(" !  Unable to get message date for oldest/newest: ",D);
     tm_mess.tm_sec=tm_mess.tm_min=0;
     tm_mess.tm_hour=12; // just to be sure that timezone won't change the day
     tm_mess.tm_isdst=-1; // unknown timezone
     tm_mess.tm_mday=day;
     tm_mess.tm_mon =mon-1;
     tm_mess.tm_year=yea-1900;
     t_mess=mktime(&tm_mess); // convert it to seconds since epoch
     if (mon) nMon[mon-1]++; else myVerb(" !  Invalid month: ",D);
     if (strncmp("DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA",S,49)) {
       if (!t_oldest) t_oldest=t_mess; // initialize oldest message to current
       if (t_mess < t_oldest) t_oldest=t_mess;
       if (t_mess > t_newest) t_newest=t_mess;
     }
  }  else myVerb(" !  Invalid date: ",D);

}

/* ***** myCopy ********************************************************** */
void myCopy(char *dst, char *src, size_t max) {
  // safe copy - will add \0 to end of destination region
  strncpy(dst,src,max);
  dst[max-1]='\0';
}

/* ***** ParseInput ****************************************************** */
void ParseInput() { // parse whole input file
  long i;
  char string[MAX_LINE];  // temp. for reading lines from input
  char riadok[MAX_LINE];  // 1st part of line (to be processed)
  int a_empt=1;      // finite state automat (empty,body,header)
  int a_head=0;
  int a_body=0;
  int nDlzRiad=0;    // size of current line
  int nDlzQuot=0;    // size of quoted lines of curr. email
  int nDlzNorm=0;    // size of non-quoted lines of curr. email
  char sAuth[MAX_AUTH]=""; // From: header line
  char sSubj[MAX_SUBJ]=""; // Subject: header line
  char sDate[MAX_DATE]=""; // Date: header line
  char sMail[MAX_SUBJ]=""; // X-Mailer/User-Agent/X-Newsreader/Message-ID header line

  while (!feof(fInp) && fgets(string,MAX_LINE,fInp)) {
    // read beginning of line
    nDlzRiad=i=strlen(string);
    strcpy(riadok,string); // both are < MAX_LINE chars, so it's safe
    while (!feof(fInp) && i>1 && string[i-1]!='\n') { // read up to the end of line
      fgets(string,MAX_LINE,fInp);
      i=strlen(string);
      nDlzRiad+=i;
    } // end of line
    // parse line & change automat state
    if (a_empt && a_head) { a_head=0; a_body=1; a_empt=0; }
    if (a_empt) { // begin of file || end of body
       // find "From "
       if (regexec(&r_from, riadok, 0, NULL, 0)==0) {
          if (a_body) AddStat(sAuth,sSubj,sDate,sMail,nDlzQuot,nDlzNorm);
          // we're at beginning of new message
        a_head=1;
        a_body=0;
        nSprav++;
        PrintProgress();
        nDlzQuot=nDlzNorm=0;
        *sAuth='\0'; *sSubj='\0'; *sDate='\0'; *sMail='\0';
       }
    }
    if (nDlzRiad==1) a_empt=1; else a_empt=0;

    // parse body
    if (a_body) nDlzka+=nDlzRiad;
    if (a_body) riadok[9]='\0'; // search only in first 9 chars
    if (a_body && strstr(riadok,">")) nDlzQuot+=nDlzRiad;
                     else if (a_body) nDlzNorm+=nDlzRiad;

    // parse header
    if (a_head) {
       if (!strncasecmp(riadok,"Subject: ",9)) {
          myCopy(sSubj,riadok+9,MAX_SUBJ);
        GetSubj(sSubj);
       }
       if (!strncasecmp(riadok,"Date: ",6)) {
          myCopy(sDate,riadok+6,MAX_DATE);
        RemoveCR(sDate);
       }
       if (!strncasecmp(riadok,"From: ",6)) {
          myCopy(sAuth,riadok+6,MAX_AUTH);
        GetEmail(sAuth);
       }
       // will search for mailers in these
       // Message-ID will be used only if no other of these is present
       if (!strncasecmp(riadok,"Message-ID: ",12) && sMail=='\0') { // for Pine mailer
          myCopy(sMail,riadok+12,MAX_SUBJ);
        GetMailer(sMail);
       }
       if (!strncasecmp(riadok,"X-Newsreader: ",14)) {
          myCopy(sMail,riadok+14,MAX_SUBJ);
        GetMailer(sMail);
       }
       if (!strncasecmp(riadok,"User-Agent: ",12)) {
          myCopy(sMail,riadok+12,MAX_SUBJ);
        GetMailer(sMail);
       }
       if (!strncasecmp(riadok,"X-Mailer: ",10)) {
          myCopy(sMail,riadok+10,MAX_SUBJ);
        GetMailer(sMail);
       }
    }
  } /* while fEOF */
  time(&t);
  // compute stats of last email if needed
  if (a_body) AddStat(sAuth,sSubj,sDate,sMail,nDlzQuot,nDlzNorm);
}

Generated by  Doxygen 1.6.0   Back to index