aboutsummaryrefslogtreecommitdiff
path: root/pkg
diff options
context:
space:
mode:
authorMax Magorsch <arzano@gentoo.org>2020-06-22 19:32:48 +0200
committerMax Magorsch <arzano@gentoo.org>2020-06-22 19:32:48 +0200
commit0f5a1f528fe4f5453f315564b448cfb1f9fea711 (patch)
tree32d8e114de3681499bac8eaba3212be5ba81ceaf /pkg
parentRework the data model to improve the performance (diff)
downloadarchives-0f5a1f528fe4f5453f315564b448cfb1f9fea711.tar.gz
archives-0f5a1f528fe4f5453f315564b448cfb1f9fea711.tar.bz2
archives-0f5a1f528fe4f5453f315564b448cfb1f9fea711.zip
Improve the performance of the importer
Signed-off-by: Max Magorsch <arzano@gentoo.org>
Diffstat (limited to 'pkg')
-rw-r--r--pkg/importer/importer.go80
-rw-r--r--pkg/importer/utils.go91
2 files changed, 131 insertions, 40 deletions
diff --git a/pkg/importer/importer.go b/pkg/importer/importer.go
index cdb278d..76ba8e7 100644
--- a/pkg/importer/importer.go
+++ b/pkg/importer/importer.go
@@ -2,14 +2,88 @@ package importer
import (
"archives/pkg/config"
+ "archives/pkg/database"
+ "archives/pkg/models"
"fmt"
+ "os"
"path/filepath"
+ "strconv"
+ "strings"
+ "time"
)
+var overAllcounter int
+var importedCounter int
+var startTime time.Time
+
+
func FullImport() {
+
+ fmt.Println("Init import...")
+ filepath.Walk(config.MailDirPath(), initImport)
+
+ overAllcounter = 0
+ importedCounter = 0
+ startTime = time.Now()
+ filepath.Walk(config.MailDirPath(), func(path string, info os.FileInfo, err error) error {
+ if overAllcounter % 1000 == 0 {
+ fmt.Println(strconv.Itoa(overAllcounter) + ": " + time.Now().Sub(startTime).String())
+ }
+ overAllcounter++
+ if err != nil {
+ return err
+ }
+ if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) {
+ importedCounter++
+ importMail(path, info.Name())
+ }
+ return nil
+ })
+
+ fmt.Println("Finished full import. Imported " + strconv.Itoa(importedCounter) + " messages.")
+}
+
+func IncrementalImport() {
+ var messages []*models.Message
+ err := database.DBCon.Model(&messages).
+ Column("filename").
+ Select()
+
+ if err != nil {
+ fmt.Println("Problem during import, aborting:")
+ fmt.Println(err)
+ return
+ }
+
fmt.Println("Init import...")
filepath.Walk(config.MailDirPath(), initImport)
- fmt.Println("Start import...")
- filepath.Walk(config.MailDirPath(), importMail)
- fmt.Println("Finished import.")
+
+ overAllcounter = 0
+ importedCounter = 0
+ startTime = time.Now()
+ filepath.Walk(config.MailDirPath(), func(path string, info os.FileInfo, err error) error {
+ if overAllcounter % 1000 == 0 {
+ fmt.Println(strconv.Itoa(overAllcounter) + ": " + time.Now().Sub(startTime).String())
+ }
+ overAllcounter++
+ if err != nil {
+ return err
+ }
+ if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) && !fileIsAlreadyPresent(path, messages) {
+ importedCounter++
+ importMail(path, info.Name())
+ }
+ return nil
+ })
+
+ fmt.Println("Finished incremental import. Imported " + strconv.Itoa(importedCounter) + " new messages.")
}
+
+func fileIsAlreadyPresent(path string, messages []*models.Message) bool {
+ for _, message := range messages {
+ if strings.Contains(strings.TrimRight(path, ",S"), strings.TrimRight(message.Filename, ",S")){
+ return true
+ }
+ }
+ return false
+} \ No newline at end of file
diff --git a/pkg/importer/utils.go b/pkg/importer/utils.go
index 8383ad0..de5b27c 100644
--- a/pkg/importer/utils.go
+++ b/pkg/importer/utils.go
@@ -4,9 +4,11 @@ import (
"archives/pkg/config"
"archives/pkg/database"
"archives/pkg/models"
+ "bytes"
"fmt"
"io"
"io/ioutil"
+ "log"
"mime/multipart"
"net/mail"
"os"
@@ -31,8 +33,16 @@ func initImport(path string, info os.FileInfo, err error) error {
}
if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) {
- file, _ := os.Open(path)
- m, _ := mail.ReadMessage(file)
+ file, err := os.Open(path)
+ defer file.Close()
+ if err != nil {
+ return err
+ }
+
+ m, err := mail.ReadMessage(file)
+ if err != nil {
+ return err
+ }
mails = append(mails, &models.Message{
Id: m.Header.Get("X-Archives-Hash"),
@@ -47,49 +57,56 @@ func initImport(path string, info os.FileInfo, err error) error {
}
// TODO
-func importMail(path string, info os.FileInfo, err error) error {
+func importMail(path, filename string) error {
+ content, err := ioutil.ReadFile(path)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ r := bytes.NewReader(content)
+ m, err := mail.ReadMessage(r)
if err != nil {
return err
}
- if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) {
- file, _ := os.Open(path)
- m, _ := mail.ReadMessage(file)
-
- msg := models.Message{
- Id: m.Header.Get("X-Archives-Hash"),
- MessageId: m.Header.Get("Message-Id"),
- Filename: info.Name(),
- From: m.Header.Get("From"),
- To: strings.Split(m.Header.Get("To"), ","),
- Cc: strings.Split(m.Header.Get("Cc"), ","),
- Subject: m.Header.Get("Subject"),
-
- List: getListName(path),
-
- // TODO
- Date: getDate(m.Header),
- InReplyToId: getInReplyToMail(m.Header.Get("In-Reply-To"), m.Header.Get("From")),
- //References: getReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("From")),
- Body: getBody(m.Header, m.Body),
- Attachments: getAttachments(m.Header, m.Body),
-
- StartsThread: m.Header.Get("In-Reply-To") == "" && m.Header.Get("References") == "",
-
- Comment: "",
- Hidden: false,
- }
- err := insertMessage(msg)
+ go importIntoDatabase(path, filename, m)
- if err != nil {
- fmt.Println("Error during importing Mail")
- fmt.Println(err)
- }
+ return nil
+}
- insertReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("X-Archives-Hash"), m.Header.Get("From"))
+func importIntoDatabase(path, filename string, m *mail.Message) {
+ msg := models.Message{
+ Id: m.Header.Get("X-Archives-Hash"),
+ MessageId: m.Header.Get("Message-Id"),
+ Filename: filename,
+ From: m.Header.Get("From"),
+ To: strings.Split(m.Header.Get("To"), ","),
+ Cc: strings.Split(m.Header.Get("Cc"), ","),
+ Subject: m.Header.Get("Subject"),
+
+ List: getListName(path),
+
+ // TODO
+ Date: getDate(m.Header),
+ InReplyToId: getInReplyToMail(m.Header.Get("In-Reply-To"), m.Header.Get("From")),
+ //References: getReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("From")),
+ Body: getBody(m.Header, m.Body),
+ Attachments: getAttachments(m.Header, m.Body),
+
+ StartsThread: m.Header.Get("In-Reply-To") == "" && m.Header.Get("References") == "",
+
+ Comment: "",
+ Hidden: false,
+ }
+ err := insertMessage(msg)
+ if err != nil {
+ fmt.Println("Error during importing Mail")
+ fmt.Println(err)
}
- return nil
+
+ insertReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("X-Archives-Hash"), m.Header.Get("From"))
+
}
func getInReplyToMail(messageId, from string) string {