From 0f5a1f528fe4f5453f315564b448cfb1f9fea711 Mon Sep 17 00:00:00 2001 From: Max Magorsch Date: Mon, 22 Jun 2020 19:32:48 +0200 Subject: Improve the performance of the importer Signed-off-by: Max Magorsch --- pkg/importer/importer.go | 80 ++++++++++++++++++++++++++++++++++++++++-- pkg/importer/utils.go | 91 ++++++++++++++++++++++++++++-------------------- 2 files changed, 131 insertions(+), 40 deletions(-) (limited to 'pkg/importer') diff --git a/pkg/importer/importer.go b/pkg/importer/importer.go index cdb278d..76ba8e7 100644 --- a/pkg/importer/importer.go +++ b/pkg/importer/importer.go @@ -2,14 +2,88 @@ package importer import ( "archives/pkg/config" + "archives/pkg/database" + "archives/pkg/models" "fmt" + "os" "path/filepath" + "strconv" + "strings" + "time" ) +var overAllcounter int +var importedCounter int +var startTime time.Time + + func FullImport() { + + fmt.Println("Init import...") + filepath.Walk(config.MailDirPath(), initImport) + + overAllcounter = 0 + importedCounter = 0 + startTime = time.Now() + filepath.Walk(config.MailDirPath(), func(path string, info os.FileInfo, err error) error { + if overAllcounter % 1000 == 0 { + fmt.Println(strconv.Itoa(overAllcounter) + ": " + time.Now().Sub(startTime).String()) + } + overAllcounter++ + if err != nil { + return err + } + if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) { + importedCounter++ + importMail(path, info.Name()) + } + return nil + }) + + fmt.Println("Finished full import. Imported " + strconv.Itoa(importedCounter) + " messages.") +} + +func IncrementalImport() { + var messages []*models.Message + err := database.DBCon.Model(&messages). + Column("filename"). + Select() + + if err != nil { + fmt.Println("Problem during import, aborting:") + fmt.Println(err) + return + } + fmt.Println("Init import...") filepath.Walk(config.MailDirPath(), initImport) - fmt.Println("Start import...") - filepath.Walk(config.MailDirPath(), importMail) - fmt.Println("Finished import.") + + overAllcounter = 0 + importedCounter = 0 + startTime = time.Now() + filepath.Walk(config.MailDirPath(), func(path string, info os.FileInfo, err error) error { + if overAllcounter % 1000 == 0 { + fmt.Println(strconv.Itoa(overAllcounter) + ": " + time.Now().Sub(startTime).String()) + } + overAllcounter++ + if err != nil { + return err + } + if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) && !fileIsAlreadyPresent(path, messages) { + importedCounter++ + importMail(path, info.Name()) + } + return nil + }) + + fmt.Println("Finished incremental import. Imported " + strconv.Itoa(importedCounter) + " new messages.") } + +func fileIsAlreadyPresent(path string, messages []*models.Message) bool { + for _, message := range messages { + if strings.Contains(strings.TrimRight(path, ",S"), strings.TrimRight(message.Filename, ",S")){ + return true + } + } + return false +} \ No newline at end of file diff --git a/pkg/importer/utils.go b/pkg/importer/utils.go index 8383ad0..de5b27c 100644 --- a/pkg/importer/utils.go +++ b/pkg/importer/utils.go @@ -4,9 +4,11 @@ import ( "archives/pkg/config" "archives/pkg/database" "archives/pkg/models" + "bytes" "fmt" "io" "io/ioutil" + "log" "mime/multipart" "net/mail" "os" @@ -31,8 +33,16 @@ func initImport(path string, info os.FileInfo, err error) error { } if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) { - file, _ := os.Open(path) - m, _ := mail.ReadMessage(file) + file, err := os.Open(path) + defer file.Close() + if err != nil { + return err + } + + m, err := mail.ReadMessage(file) + if err != nil { + return err + } mails = append(mails, &models.Message{ Id: m.Header.Get("X-Archives-Hash"), @@ -47,49 +57,56 @@ func initImport(path string, info os.FileInfo, err error) error { } // TODO -func importMail(path string, info os.FileInfo, err error) error { +func importMail(path, filename string) error { + content, err := ioutil.ReadFile(path) + if err != nil { + log.Fatal(err) + } + + r := bytes.NewReader(content) + m, err := mail.ReadMessage(r) if err != nil { return err } - if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) { - file, _ := os.Open(path) - m, _ := mail.ReadMessage(file) - - msg := models.Message{ - Id: m.Header.Get("X-Archives-Hash"), - MessageId: m.Header.Get("Message-Id"), - Filename: info.Name(), - From: m.Header.Get("From"), - To: strings.Split(m.Header.Get("To"), ","), - Cc: strings.Split(m.Header.Get("Cc"), ","), - Subject: m.Header.Get("Subject"), - - List: getListName(path), - - // TODO - Date: getDate(m.Header), - InReplyToId: getInReplyToMail(m.Header.Get("In-Reply-To"), m.Header.Get("From")), - //References: getReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("From")), - Body: getBody(m.Header, m.Body), - Attachments: getAttachments(m.Header, m.Body), - - StartsThread: m.Header.Get("In-Reply-To") == "" && m.Header.Get("References") == "", - - Comment: "", - Hidden: false, - } - err := insertMessage(msg) + go importIntoDatabase(path, filename, m) - if err != nil { - fmt.Println("Error during importing Mail") - fmt.Println(err) - } + return nil +} - insertReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("X-Archives-Hash"), m.Header.Get("From")) +func importIntoDatabase(path, filename string, m *mail.Message) { + msg := models.Message{ + Id: m.Header.Get("X-Archives-Hash"), + MessageId: m.Header.Get("Message-Id"), + Filename: filename, + From: m.Header.Get("From"), + To: strings.Split(m.Header.Get("To"), ","), + Cc: strings.Split(m.Header.Get("Cc"), ","), + Subject: m.Header.Get("Subject"), + + List: getListName(path), + + // TODO + Date: getDate(m.Header), + InReplyToId: getInReplyToMail(m.Header.Get("In-Reply-To"), m.Header.Get("From")), + //References: getReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("From")), + Body: getBody(m.Header, m.Body), + Attachments: getAttachments(m.Header, m.Body), + + StartsThread: m.Header.Get("In-Reply-To") == "" && m.Header.Get("References") == "", + + Comment: "", + Hidden: false, + } + err := insertMessage(msg) + if err != nil { + fmt.Println("Error during importing Mail") + fmt.Println(err) } - return nil + + insertReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("X-Archives-Hash"), m.Header.Get("From")) + } func getInReplyToMail(messageId, from string) string { -- cgit v1.2.3-18-g5258