aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Magorsch <arzano@gentoo.org>2020-06-22 19:32:48 +0200
committerMax Magorsch <arzano@gentoo.org>2020-06-22 19:32:48 +0200
commit0f5a1f528fe4f5453f315564b448cfb1f9fea711 (patch)
tree32d8e114de3681499bac8eaba3212be5ba81ceaf
parentRework the data model to improve the performance (diff)
downloadarchives-0f5a1f528fe4f5453f315564b448cfb1f9fea711.tar.gz
archives-0f5a1f528fe4f5453f315564b448cfb1f9fea711.tar.bz2
archives-0f5a1f528fe4f5453f315564b448cfb1f9fea711.zip
Improve the performance of the importer
Signed-off-by: Max Magorsch <arzano@gentoo.org>
-rw-r--r--archives.go8
-rw-r--r--docker-compose.develop.yml3
-rw-r--r--docker-compose.yml2
-rw-r--r--pkg/importer/importer.go80
-rw-r--r--pkg/importer/utils.go91
5 files changed, 141 insertions, 43 deletions
diff --git a/archives.go b/archives.go
index b6b2465..dd517ed 100644
--- a/archives.go
+++ b/archives.go
@@ -21,14 +21,20 @@ func main() {
// main part
- fullImport := flag.Bool("fullimport", false, "Start a full import, importing all mails")
+ fullImport := flag.Bool("full-import", false, "Start a full import, importing all mails")
+ incrementalImport := flag.Bool("incremental-import", false, "Start a incremental import, importing only new mails")
serve := flag.Bool("serve", false, "Start serving the web application")
+
flag.Parse()
if *fullImport {
importer.FullImport()
}
+ if *incrementalImport {
+ importer.IncrementalImport()
+ }
+
if *serve {
app.Serve()
}
diff --git a/docker-compose.develop.yml b/docker-compose.develop.yml
index 0c24a04..7acdd90 100644
--- a/docker-compose.develop.yml
+++ b/docker-compose.develop.yml
@@ -13,7 +13,8 @@ services:
LANG: en_US.UTF-8
LANGUAGE: en_US:en
LC_ALL: en_US.UTF-8
- shm_size: 512mb
+ shm_size: 1024mb
+ command: postgres -c 'shared_buffers=512MB' -c 'max_connections=1000'
volumes:
- pgdata:/var/lib/postgresql/data
pgadmin:
diff --git a/docker-compose.yml b/docker-compose.yml
index f95201e..a606797 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -31,7 +31,7 @@ services:
com.centurylinklabs.watchtower.enable: "true"
environment:
ARCHIVES_LOG_FILE: '/var/log/archives/updater.log'
- command: '--fullimport'
+ command: '--full-import'
depends_on:
- db
db:
diff --git a/pkg/importer/importer.go b/pkg/importer/importer.go
index cdb278d..76ba8e7 100644
--- a/pkg/importer/importer.go
+++ b/pkg/importer/importer.go
@@ -2,14 +2,88 @@ package importer
import (
"archives/pkg/config"
+ "archives/pkg/database"
+ "archives/pkg/models"
"fmt"
+ "os"
"path/filepath"
+ "strconv"
+ "strings"
+ "time"
)
+var overAllcounter int
+var importedCounter int
+var startTime time.Time
+
+
func FullImport() {
+
+ fmt.Println("Init import...")
+ filepath.Walk(config.MailDirPath(), initImport)
+
+ overAllcounter = 0
+ importedCounter = 0
+ startTime = time.Now()
+ filepath.Walk(config.MailDirPath(), func(path string, info os.FileInfo, err error) error {
+ if overAllcounter % 1000 == 0 {
+ fmt.Println(strconv.Itoa(overAllcounter) + ": " + time.Now().Sub(startTime).String())
+ }
+ overAllcounter++
+ if err != nil {
+ return err
+ }
+ if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) {
+ importedCounter++
+ importMail(path, info.Name())
+ }
+ return nil
+ })
+
+ fmt.Println("Finished full import. Imported " + strconv.Itoa(importedCounter) + " messages.")
+}
+
+func IncrementalImport() {
+ var messages []*models.Message
+ err := database.DBCon.Model(&messages).
+ Column("filename").
+ Select()
+
+ if err != nil {
+ fmt.Println("Problem during import, aborting:")
+ fmt.Println(err)
+ return
+ }
+
fmt.Println("Init import...")
filepath.Walk(config.MailDirPath(), initImport)
- fmt.Println("Start import...")
- filepath.Walk(config.MailDirPath(), importMail)
- fmt.Println("Finished import.")
+
+ overAllcounter = 0
+ importedCounter = 0
+ startTime = time.Now()
+ filepath.Walk(config.MailDirPath(), func(path string, info os.FileInfo, err error) error {
+ if overAllcounter % 1000 == 0 {
+ fmt.Println(strconv.Itoa(overAllcounter) + ": " + time.Now().Sub(startTime).String())
+ }
+ overAllcounter++
+ if err != nil {
+ return err
+ }
+ if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) && !fileIsAlreadyPresent(path, messages) {
+ importedCounter++
+ importMail(path, info.Name())
+ }
+ return nil
+ })
+
+ fmt.Println("Finished incremental import. Imported " + strconv.Itoa(importedCounter) + " new messages.")
}
+
+func fileIsAlreadyPresent(path string, messages []*models.Message) bool {
+ for _, message := range messages {
+ if strings.Contains(strings.TrimRight(path, ",S"), strings.TrimRight(message.Filename, ",S")){
+ return true
+ }
+ }
+ return false
+} \ No newline at end of file
diff --git a/pkg/importer/utils.go b/pkg/importer/utils.go
index 8383ad0..de5b27c 100644
--- a/pkg/importer/utils.go
+++ b/pkg/importer/utils.go
@@ -4,9 +4,11 @@ import (
"archives/pkg/config"
"archives/pkg/database"
"archives/pkg/models"
+ "bytes"
"fmt"
"io"
"io/ioutil"
+ "log"
"mime/multipart"
"net/mail"
"os"
@@ -31,8 +33,16 @@ func initImport(path string, info os.FileInfo, err error) error {
}
if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) {
- file, _ := os.Open(path)
- m, _ := mail.ReadMessage(file)
+ file, err := os.Open(path)
+ defer file.Close()
+ if err != nil {
+ return err
+ }
+
+ m, err := mail.ReadMessage(file)
+ if err != nil {
+ return err
+ }
mails = append(mails, &models.Message{
Id: m.Header.Get("X-Archives-Hash"),
@@ -47,49 +57,56 @@ func initImport(path string, info os.FileInfo, err error) error {
}
// TODO
-func importMail(path string, info os.FileInfo, err error) error {
+func importMail(path, filename string) error {
+ content, err := ioutil.ReadFile(path)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ r := bytes.NewReader(content)
+ m, err := mail.ReadMessage(r)
if err != nil {
return err
}
- if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) {
- file, _ := os.Open(path)
- m, _ := mail.ReadMessage(file)
-
- msg := models.Message{
- Id: m.Header.Get("X-Archives-Hash"),
- MessageId: m.Header.Get("Message-Id"),
- Filename: info.Name(),
- From: m.Header.Get("From"),
- To: strings.Split(m.Header.Get("To"), ","),
- Cc: strings.Split(m.Header.Get("Cc"), ","),
- Subject: m.Header.Get("Subject"),
-
- List: getListName(path),
-
- // TODO
- Date: getDate(m.Header),
- InReplyToId: getInReplyToMail(m.Header.Get("In-Reply-To"), m.Header.Get("From")),
- //References: getReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("From")),
- Body: getBody(m.Header, m.Body),
- Attachments: getAttachments(m.Header, m.Body),
-
- StartsThread: m.Header.Get("In-Reply-To") == "" && m.Header.Get("References") == "",
-
- Comment: "",
- Hidden: false,
- }
- err := insertMessage(msg)
+ go importIntoDatabase(path, filename, m)
- if err != nil {
- fmt.Println("Error during importing Mail")
- fmt.Println(err)
- }
+ return nil
+}
- insertReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("X-Archives-Hash"), m.Header.Get("From"))
+func importIntoDatabase(path, filename string, m *mail.Message) {
+ msg := models.Message{
+ Id: m.Header.Get("X-Archives-Hash"),
+ MessageId: m.Header.Get("Message-Id"),
+ Filename: filename,
+ From: m.Header.Get("From"),
+ To: strings.Split(m.Header.Get("To"), ","),
+ Cc: strings.Split(m.Header.Get("Cc"), ","),
+ Subject: m.Header.Get("Subject"),
+
+ List: getListName(path),
+
+ // TODO
+ Date: getDate(m.Header),
+ InReplyToId: getInReplyToMail(m.Header.Get("In-Reply-To"), m.Header.Get("From")),
+ //References: getReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("From")),
+ Body: getBody(m.Header, m.Body),
+ Attachments: getAttachments(m.Header, m.Body),
+
+ StartsThread: m.Header.Get("In-Reply-To") == "" && m.Header.Get("References") == "",
+
+ Comment: "",
+ Hidden: false,
+ }
+ err := insertMessage(msg)
+ if err != nil {
+ fmt.Println("Error during importing Mail")
+ fmt.Println(err)
}
- return nil
+
+ insertReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("X-Archives-Hash"), m.Header.Get("From"))
+
}
func getInReplyToMail(messageId, from string) string {