makiuchi-d · gijsbotje · Dec 25, 2025 · Dec 29, 2025 · Dec 29, 2025 · Dec 30, 2025
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -12,15 +12,15 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v6
 
     - name: Set up Go
-      uses: actions/setup-go@v2
+      uses: actions/setup-go@v6
       with:
-        go-version: 1.17.x
+        go-version-file: 'go.mod'
 
     - name: Test
       run: go test -v -race -coverprofile=coverage.txt -covermode=atomic -shuffle=on ./...
 
     - name: Upload coverage
-      uses: codecov/codecov-action@v2
+      uses: codecov/codecov-action@v4
diff --git a/common/eci_encoder_set.go b/common/eci_encoder_set.go
@@ -0,0 +1,204 @@
+package common
+
+import (
+	"strings"
+
+	"golang.org/x/text/encoding"
+	"golang.org/x/text/encoding/charmap"
+	"golang.org/x/text/encoding/ianaindex"
+	"golang.org/x/text/encoding/japanese"
+	"golang.org/x/text/encoding/unicode"
+
+	"github.com/makiuchi-d/gozxing"
+)
+
+// ECIEncoderSet Set of CharsetEncoders for a given input string
+//
+// Invariants:
+//   - The list contains only encoders from CharacterSetECI (list is shorter then the list of encoders available on
+//     the platform for which ECI values are defined).
+//   - The list contains encoders at least one encoder for every character in the input.
+//   - The first encoder in the list is always the ISO-8859-1 encoder even of no character in the input can be encoded
+//     by it.
+//   - If the input contains a character that is not in ISO-8859-1 then the last two entries in the list will be the
+//     UTF-8 encoder and the UTF-16BE encoder.
+type ECIEncoderSet struct {
+	encoders             []encoding.Encoding
+	priorityEncoderIndex int
+}
+
+var encodersList = []encoding.Encoding{
+	charmap.CodePage437, // "IBM437"
+	charmap.ISO8859_2,
+	charmap.ISO8859_3,
+	charmap.ISO8859_4,
+	charmap.ISO8859_5,
+	charmap.ISO8859_6,
+	charmap.ISO8859_7,
+	charmap.ISO8859_8,
+	charmap.ISO8859_9,
+	charmap.ISO8859_10,
+	//charmap.ISO8859_11, // golang does not support
+	charmap.ISO8859_13,
+	charmap.ISO8859_14,
+	charmap.ISO8859_15,
+	charmap.ISO8859_16,
+	charmap.Windows1250,
+	charmap.Windows1251,
+	charmap.Windows1252,
+	charmap.Windows1256,
+	japanese.ShiftJIS,
+}
+
+// NewECIEncoderSet Constructs an encoder set
+//
+// @param stringToEncode the string that needs to be encoded
+// @param priorityCharset The preferred encoding.Encoding or nil.
+// @param fnc1 fnc1 denotes the character in the input that represents the FNC1 character or -1 for a non-GS1 bar
+// code. When specified, it is considered an error to pass it as argument to the methods canEncode() or encode().
+func NewECIEncoderSet(stringToEncode []rune, priorityCharset encoding.Encoding, fnc1 int) *ECIEncoderSet {
+	neededEncoders := make([]encoding.Encoding, 0)
+
+	// we always need the ISO-8859-1 encoder. It is the default encoding
+	neededEncoders = append(neededEncoders, charmap.ISO8859_1)
+
+	needUnicodeEncoder := false
+	if priorityCharset != nil {
+		if name, err := ianaindex.IANA.Name(priorityCharset); err == nil {
+			needUnicodeEncoder = strings.HasPrefix(name, "UTF")
+		}
+	}
+
+	// Walk over the input string and see if all characters can be encoded with the list of encoders
+	for i := 0; i < len(stringToEncode); i++ {
+		canEncode := false
+		c := stringToEncode[i]
+		if c == rune(fnc1) {
+			canEncode = true
+		} else {
+			for _, encoder := range neededEncoders {
+				if canEncodeChar(encoder, c) {
+					canEncode = true
+					break
+				}
+			}
+		}
+
+		if !canEncode {
+			// for the character at position i we don't yet have an encoder in the list
+			for _, encoder := range encodersList {
+				if canEncodeChar(encoder, c) {
+					// Good, we found an encoder that can encode the character. We add him to the list and continue scanning
+					// the input
+					neededEncoders = append(neededEncoders, encoder)
+					canEncode = true
+					break
+				}
+			}
+		}
+
+		if !canEncode {
+			// The character is not encodeable by any of the single byte encoders so we remember that we will need a
+			// Unicode encoder.
+			needUnicodeEncoder = true
+		}
+	}
+
+	var encoders []encoding.Encoding
+	if len(neededEncoders) == 1 && !needUnicodeEncoder {
+		// the entire input can be encoded by the ISO-8859-1 encoder
+		encoders = []encoding.Encoding{neededEncoders[0]}
+	} else {
+		// we need more than one single byte encoder or we need a Unicode encoder.
+		// In this case we append a UTF-8 and UTF-16 encoder to the list
+		encoders = make([]encoding.Encoding, len(neededEncoders)+2)
+		copy(encoders, neededEncoders)
+		encoders[len(neededEncoders)] = unicode.UTF8
+		utf16be, _ := ianaindex.IANA.Encoding("UTF-16BE")
+		encoders[len(neededEncoders)+1] = utf16be
+	}
+
+	// Compute priorityEncoderIndex by looking up priorityCharset in encoders
+	priorityEncoderIndexValue := -1
+	if priorityCharset != nil {
+		priorityName, _ := ianaindex.IANA.Name(priorityCharset)
+		for i := 0; i < len(encoders); i++ {
+			if encoders[i] != nil {
+				if name, err := ianaindex.IANA.Name(encoders[i]); err == nil {
+					if name == priorityName {
+						priorityEncoderIndexValue = i
+						break
+					}
+				}
+			}
+		}
+	}
+
+	return &ECIEncoderSet{
+		encoders:             encoders,
+		priorityEncoderIndex: priorityEncoderIndexValue,
+	}
+}
+
+func canEncodeChar(enc encoding.Encoding, c rune) bool {
+	encoder := enc.NewEncoder()
+	_, err := encoder.Bytes([]byte(string(c)))
+	return err == nil
+}
+
+func (this *ECIEncoderSet) Length() int {
+	return len(this.encoders)
+}
+
+func (this *ECIEncoderSet) GetCharsetName(index int) string {
+	if index >= len(this.encoders) {
+		return ""
+	}
+	name, _ := ianaindex.IANA.Name(this.encoders[index])
+	return name
+}
+
+func (this *ECIEncoderSet) GetCharset(index int) encoding.Encoding {
+	if index >= len(this.encoders) {
+		return nil
+	}
+	return this.encoders[index]
+}
+
+func (this *ECIEncoderSet) GetECIValue(encoderIndex int) int {
+	if encoderIndex >= len(this.encoders) {
+		return -1
+	}
+	if eci, ok := GetCharacterSetECI(this.encoders[encoderIndex]); ok {
+		return eci.GetValue()
+	}
+	return -1
+}
+
+// GetPriorityEncoderIndex returns -1 if no priority charset was defined
+func (this *ECIEncoderSet) GetPriorityEncoderIndex() int {
+	return this.priorityEncoderIndex
+}
+
+func (this *ECIEncoderSet) CanEncode(c rune, encoderIndex int) bool {
+	if encoderIndex >= len(this.encoders) {
+		return false
+	}
+	return canEncodeChar(this.encoders[encoderIndex], c)
+}
+
+func (this *ECIEncoderSet) EncodeChar(c rune, encoderIndex int) ([]byte, error) {
+	if encoderIndex >= len(this.encoders) {
+		return nil, gozxing.NewWriterException("encoderIndex out of bounds")
+	}
+	encoder := this.encoders[encoderIndex].NewEncoder()
+	return encoder.Bytes([]byte(string(c)))
+}
+
+func (this *ECIEncoderSet) EncodeString(s string, encoderIndex int) ([]byte, error) {
+	if encoderIndex >= len(this.encoders) {
+		return nil, gozxing.NewWriterException("encoderIndex out of bounds")
+	}
+	encoder := this.encoders[encoderIndex].NewEncoder()
+	return encoder.Bytes([]byte(s))
+}
diff --git a/common/eci_encoder_set_test.go b/common/eci_encoder_set_test.go
@@ -0,0 +1,169 @@
+package common
+
+import (
+	"reflect"
+	"testing"
+
+	"golang.org/x/text/encoding"
+	"golang.org/x/text/encoding/charmap"
+	"golang.org/x/text/encoding/ianaindex"
+	"golang.org/x/text/encoding/japanese"
+	"golang.org/x/text/encoding/unicode"
+)
+
+func TestCanEncodeChar(t *testing.T) {
+	latin1, _ := ianaindex.IANA.Encoding("ISO-8859-1")
+	sjis, _ := ianaindex.IANA.Encoding("Shift_JIS")
+	utf8, _ := ianaindex.IANA.Encoding("UTF-8")
+
+	tests := map[string]struct {
+		enc  encoding.Encoding
+		char rune
+		exp  bool
+	}{
+		"ISO-8859-1 ok": {latin1, 'À', true},
+		"ISO-8859-1 ng": {latin1, 'あ', false},
+		"Shift_JIS ok":  {sjis, 'あ', true},
+		"Shift_JIS ng":  {sjis, 'À', false},
+		"UTF-8 ok":      {utf8, '😀', true},
+	}
+	for name, test := range tests {
+		t.Run(name, func(t *testing.T) {
+			r := canEncodeChar(test.enc, test.char)
+
+			if r != test.exp {
+				t.Errorf("canEncodeChar = %v wants %v", r, test.exp)
+			}
+		})
+	}
+}
+
+func TestNewECIEncoderSet(t *testing.T) {
+	latin1 := charmap.ISO8859_1
+	sjis := japanese.ShiftJIS
+	utf8 := unicode.UTF8
+	utf16be, _ := ianaindex.IANA.Encoding("UTF-16BE")
+
+	tests := map[string]struct {
+		str     []rune
+		charset encoding.Encoding
+		fnc1    int
+
+		encs    []encoding.Encoding
+		names   []string
+		ecis    []int
+		pencidx int
+	}{
+		"Latin1-only": {
+			[]rune("abc"), nil, -1,
+			[]encoding.Encoding{latin1},
+			[]string{"ISO_8859-1:1987"},
+			[]int{1},
+			-1,
+		},
+		"ShiftJIS": {
+			[]rune("あ"), sjis, -1,
+			[]encoding.Encoding{latin1, sjis, utf8, utf16be},
+			[]string{"ISO_8859-1:1987", "Shift_JIS", "UTF-8", "UTF-16BE"},
+			[]int{1, 20, 26, 25},
+			1,
+		},
+		"NeedUnicode": {
+			[]rune("😀"), nil, 1,
+			[]encoding.Encoding{latin1, utf8, utf16be},
+			[]string{"ISO_8859-1:1987", "UTF-8", "UTF-16BE"},
+			[]int{1, 26, 25},
+			-1,
+		},
+		"FNC1": {
+			[]rune("\u001dabc"), nil, 0x1d,
+			[]encoding.Encoding{latin1},
+			[]string{"ISO_8859-1:1987"},
+			[]int{1},
+			-1,
+		},
+	}
+	for name, test := range tests {
+		t.Run(name, func(t *testing.T) {
+			encset := NewECIEncoderSet(test.str, test.charset, test.fnc1)
+
+			l := encset.Length()
+			if l != len(test.encs) {
+				t.Fatalf("Length = %v wants %v", l, len(test.encs))
+			}
+			for i := range l {
+				if e := encset.GetCharset(i); e != test.encs[i] {
+					t.Errorf("GetCharset(%d) = %v wants %v", i, e, test.encs[i])
+				}
+				if n := encset.GetCharsetName(i); n != test.names[i] {
+					t.Errorf("GetCharsetName(%d) = %q wants %q", i, n, test.names[i])
+				}
+				if v := encset.GetECIValue(i); v != test.ecis[i] {
+					t.Errorf("GetECIValue(%d) = %v wants %v", i, v, test.ecis[i])
+				}
+			}
+			if p := encset.GetPriorityEncoderIndex(); p != test.pencidx {
+				t.Errorf("GetPriorityEncoderIndex = %v wants %v", p, test.pencidx)
+			}
+		})
+	}
+}
+
+func TestECIEncoderSet_MethodsFail(t *testing.T) {
+	es := NewECIEncoderSet([]rune("abc"), nil, -1)
+
+	if n := es.GetCharsetName(2); n != "" {
+		t.Errorf("GetCharsetName must be empty: %q", n)
+	}
+	if e := es.GetCharset(2); e != nil {
+		t.Errorf("Getcharset must be nil: %v", e)
+	}
+	if v := es.GetECIValue(2); v != -1 {
+		t.Errorf("GetECIValue must be -1: %v", v)
+	}
+}
+
+func TestECIEncoderSet_Encode(t *testing.T) {
+	es := NewECIEncoderSet([]rune("abc"), nil, -1)
+	str := "Àabc"
+	expchar := []byte{0xc0}
+	expstr := []byte{0xc0, 0x61, 0x62, 0x63}
+
+	char := []rune(str)[0]
+	if !es.CanEncode(char, 0) {
+		t.Errorf("CanEncode must be true")
+	}
+	if es.CanEncode(char, 1) {
+		t.Errorf("CanEncode must be false")
+	}
+
+	encchar, err := es.EncodeChar(char, 0)
+	if err != nil {
+		t.Errorf("EncodeChar error: %v", err)
+	} else if !reflect.DeepEqual(encchar, expchar) {
+		t.Errorf("EncodeChar(%c, 0) = %v wants %v", char, encchar, expchar)
+	}
+	_, err = es.EncodeChar(char, 1)
+	if err == nil {
+		t.Errorf("EncodeChar(%c, 1) must be error", char)
+	}
+	_, err = es.EncodeChar('あ', 0)
+	if err == nil {
+		t.Errorf("EncodeChar(あ, 0) must be error")
+	}
+
+	encstr, err := es.EncodeString(str, 0)
+	if err != nil {
+		t.Errorf("EncodeString(%q, 0) error: %v", str, err)
+	} else if !reflect.DeepEqual(encstr, expstr) {
+		t.Errorf("EncodeString(%q, 0) = %v wants %v", str, encstr, expstr)
+	}
+	_, err = es.EncodeString(str, 1)
+	if err == nil {
+		t.Errorf("EncodeString(%q, 1) must be error", str)
+	}
+	_, err = es.EncodeString("あ", 0)
+	if err == nil {
+		t.Errorf("EncodeString(\"あ\", 0) must be error")
+	}
+}