Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v6

- name: Set up Go
uses: actions/setup-go@v2
uses: actions/setup-go@v6
with:
go-version: 1.17.x
go-version-file: 'go.mod'

- name: Test
run: go test -v -race -coverprofile=coverage.txt -covermode=atomic -shuffle=on ./...

- name: Upload coverage
uses: codecov/codecov-action@v2
uses: codecov/codecov-action@v4
204 changes: 204 additions & 0 deletions common/eci_encoder_set.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
package common

import (
"strings"

"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/ianaindex"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/unicode"

"github.com/makiuchi-d/gozxing"
)

// ECIEncoderSet Set of CharsetEncoders for a given input string
//
// Invariants:
// - The list contains only encoders from CharacterSetECI (list is shorter then the list of encoders available on
// the platform for which ECI values are defined).
// - The list contains encoders at least one encoder for every character in the input.
// - The first encoder in the list is always the ISO-8859-1 encoder even of no character in the input can be encoded
// by it.
// - If the input contains a character that is not in ISO-8859-1 then the last two entries in the list will be the
// UTF-8 encoder and the UTF-16BE encoder.
type ECIEncoderSet struct {
encoders []encoding.Encoding
priorityEncoderIndex int
}

var encodersList = []encoding.Encoding{
charmap.CodePage437, // "IBM437"
charmap.ISO8859_2,
charmap.ISO8859_3,
charmap.ISO8859_4,
charmap.ISO8859_5,
charmap.ISO8859_6,
charmap.ISO8859_7,
charmap.ISO8859_8,
charmap.ISO8859_9,
charmap.ISO8859_10,
//charmap.ISO8859_11, // golang does not support
charmap.ISO8859_13,
charmap.ISO8859_14,
charmap.ISO8859_15,
charmap.ISO8859_16,
charmap.Windows1250,
charmap.Windows1251,
charmap.Windows1252,
charmap.Windows1256,
japanese.ShiftJIS,
}

// NewECIEncoderSet Constructs an encoder set
//
// @param stringToEncode the string that needs to be encoded
// @param priorityCharset The preferred encoding.Encoding or nil.
// @param fnc1 fnc1 denotes the character in the input that represents the FNC1 character or -1 for a non-GS1 bar
// code. When specified, it is considered an error to pass it as argument to the methods canEncode() or encode().
func NewECIEncoderSet(stringToEncode []rune, priorityCharset encoding.Encoding, fnc1 int) *ECIEncoderSet {
neededEncoders := make([]encoding.Encoding, 0)

// we always need the ISO-8859-1 encoder. It is the default encoding
neededEncoders = append(neededEncoders, charmap.ISO8859_1)

needUnicodeEncoder := false
if priorityCharset != nil {
if name, err := ianaindex.IANA.Name(priorityCharset); err == nil {
needUnicodeEncoder = strings.HasPrefix(name, "UTF")
}
}

// Walk over the input string and see if all characters can be encoded with the list of encoders
for i := 0; i < len(stringToEncode); i++ {
canEncode := false
c := stringToEncode[i]
if c == rune(fnc1) {
canEncode = true
} else {
for _, encoder := range neededEncoders {
if canEncodeChar(encoder, c) {
canEncode = true
break
}
}
}

if !canEncode {
// for the character at position i we don't yet have an encoder in the list
for _, encoder := range encodersList {
if canEncodeChar(encoder, c) {
// Good, we found an encoder that can encode the character. We add him to the list and continue scanning
// the input
neededEncoders = append(neededEncoders, encoder)
canEncode = true
break
}
}
}

if !canEncode {
// The character is not encodeable by any of the single byte encoders so we remember that we will need a
// Unicode encoder.
needUnicodeEncoder = true
}
}

var encoders []encoding.Encoding
if len(neededEncoders) == 1 && !needUnicodeEncoder {
// the entire input can be encoded by the ISO-8859-1 encoder
encoders = []encoding.Encoding{neededEncoders[0]}
} else {
// we need more than one single byte encoder or we need a Unicode encoder.
// In this case we append a UTF-8 and UTF-16 encoder to the list
encoders = make([]encoding.Encoding, len(neededEncoders)+2)
copy(encoders, neededEncoders)
encoders[len(neededEncoders)] = unicode.UTF8
utf16be, _ := ianaindex.IANA.Encoding("UTF-16BE")
encoders[len(neededEncoders)+1] = utf16be
}

// Compute priorityEncoderIndex by looking up priorityCharset in encoders
priorityEncoderIndexValue := -1
if priorityCharset != nil {
priorityName, _ := ianaindex.IANA.Name(priorityCharset)
for i := 0; i < len(encoders); i++ {
if encoders[i] != nil {
if name, err := ianaindex.IANA.Name(encoders[i]); err == nil {
if name == priorityName {
priorityEncoderIndexValue = i
break
}
}
}
}
}

return &ECIEncoderSet{
encoders: encoders,
priorityEncoderIndex: priorityEncoderIndexValue,
}
}

func canEncodeChar(enc encoding.Encoding, c rune) bool {
encoder := enc.NewEncoder()
_, err := encoder.Bytes([]byte(string(c)))
return err == nil
}

func (this *ECIEncoderSet) Length() int {
return len(this.encoders)
}

func (this *ECIEncoderSet) GetCharsetName(index int) string {
if index >= len(this.encoders) {
return ""
}
name, _ := ianaindex.IANA.Name(this.encoders[index])
return name
}

func (this *ECIEncoderSet) GetCharset(index int) encoding.Encoding {
if index >= len(this.encoders) {
return nil
}
return this.encoders[index]
}

func (this *ECIEncoderSet) GetECIValue(encoderIndex int) int {
if encoderIndex >= len(this.encoders) {
return -1
}
if eci, ok := GetCharacterSetECI(this.encoders[encoderIndex]); ok {
return eci.GetValue()
}
return -1
}

// GetPriorityEncoderIndex returns -1 if no priority charset was defined
func (this *ECIEncoderSet) GetPriorityEncoderIndex() int {
return this.priorityEncoderIndex
}

func (this *ECIEncoderSet) CanEncode(c rune, encoderIndex int) bool {
if encoderIndex >= len(this.encoders) {
return false
}
return canEncodeChar(this.encoders[encoderIndex], c)
}

func (this *ECIEncoderSet) EncodeChar(c rune, encoderIndex int) ([]byte, error) {
if encoderIndex >= len(this.encoders) {
return nil, gozxing.NewWriterException("encoderIndex out of bounds")
}
encoder := this.encoders[encoderIndex].NewEncoder()
return encoder.Bytes([]byte(string(c)))
}

func (this *ECIEncoderSet) EncodeString(s string, encoderIndex int) ([]byte, error) {
if encoderIndex >= len(this.encoders) {
return nil, gozxing.NewWriterException("encoderIndex out of bounds")
}
encoder := this.encoders[encoderIndex].NewEncoder()
return encoder.Bytes([]byte(s))
}
169 changes: 169 additions & 0 deletions common/eci_encoder_set_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
package common

import (
"reflect"
"testing"

"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/ianaindex"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/unicode"
)

func TestCanEncodeChar(t *testing.T) {
latin1, _ := ianaindex.IANA.Encoding("ISO-8859-1")
sjis, _ := ianaindex.IANA.Encoding("Shift_JIS")
utf8, _ := ianaindex.IANA.Encoding("UTF-8")

tests := map[string]struct {
enc encoding.Encoding
char rune
exp bool
}{
"ISO-8859-1 ok": {latin1, 'À', true},
"ISO-8859-1 ng": {latin1, 'あ', false},
"Shift_JIS ok": {sjis, 'あ', true},
"Shift_JIS ng": {sjis, 'À', false},
"UTF-8 ok": {utf8, '😀', true},
}
for name, test := range tests {
t.Run(name, func(t *testing.T) {
r := canEncodeChar(test.enc, test.char)

if r != test.exp {
t.Errorf("canEncodeChar = %v wants %v", r, test.exp)
}
})
}
}

func TestNewECIEncoderSet(t *testing.T) {
latin1 := charmap.ISO8859_1
sjis := japanese.ShiftJIS
utf8 := unicode.UTF8
utf16be, _ := ianaindex.IANA.Encoding("UTF-16BE")

tests := map[string]struct {
str []rune
charset encoding.Encoding
fnc1 int

encs []encoding.Encoding
names []string
ecis []int
pencidx int
}{
"Latin1-only": {
[]rune("abc"), nil, -1,
[]encoding.Encoding{latin1},
[]string{"ISO_8859-1:1987"},
[]int{1},
-1,
},
"ShiftJIS": {
[]rune("あ"), sjis, -1,
[]encoding.Encoding{latin1, sjis, utf8, utf16be},
[]string{"ISO_8859-1:1987", "Shift_JIS", "UTF-8", "UTF-16BE"},
[]int{1, 20, 26, 25},
1,
},
"NeedUnicode": {
[]rune("😀"), nil, 1,
[]encoding.Encoding{latin1, utf8, utf16be},
[]string{"ISO_8859-1:1987", "UTF-8", "UTF-16BE"},
[]int{1, 26, 25},
-1,
},
"FNC1": {
[]rune("\u001dabc"), nil, 0x1d,
[]encoding.Encoding{latin1},
[]string{"ISO_8859-1:1987"},
[]int{1},
-1,
},
}
for name, test := range tests {
t.Run(name, func(t *testing.T) {
encset := NewECIEncoderSet(test.str, test.charset, test.fnc1)

l := encset.Length()
if l != len(test.encs) {
t.Fatalf("Length = %v wants %v", l, len(test.encs))
}
for i := range l {
if e := encset.GetCharset(i); e != test.encs[i] {
t.Errorf("GetCharset(%d) = %v wants %v", i, e, test.encs[i])
}
if n := encset.GetCharsetName(i); n != test.names[i] {
t.Errorf("GetCharsetName(%d) = %q wants %q", i, n, test.names[i])
}
if v := encset.GetECIValue(i); v != test.ecis[i] {
t.Errorf("GetECIValue(%d) = %v wants %v", i, v, test.ecis[i])
}
}
if p := encset.GetPriorityEncoderIndex(); p != test.pencidx {
t.Errorf("GetPriorityEncoderIndex = %v wants %v", p, test.pencidx)
}
})
}
}

func TestECIEncoderSet_MethodsFail(t *testing.T) {
es := NewECIEncoderSet([]rune("abc"), nil, -1)

if n := es.GetCharsetName(2); n != "" {
t.Errorf("GetCharsetName must be empty: %q", n)
}
if e := es.GetCharset(2); e != nil {
t.Errorf("Getcharset must be nil: %v", e)
}
if v := es.GetECIValue(2); v != -1 {
t.Errorf("GetECIValue must be -1: %v", v)
}
}

func TestECIEncoderSet_Encode(t *testing.T) {
es := NewECIEncoderSet([]rune("abc"), nil, -1)
str := "Àabc"
expchar := []byte{0xc0}
expstr := []byte{0xc0, 0x61, 0x62, 0x63}

char := []rune(str)[0]
if !es.CanEncode(char, 0) {
t.Errorf("CanEncode must be true")
}
if es.CanEncode(char, 1) {
t.Errorf("CanEncode must be false")
}

encchar, err := es.EncodeChar(char, 0)
if err != nil {
t.Errorf("EncodeChar error: %v", err)
} else if !reflect.DeepEqual(encchar, expchar) {
t.Errorf("EncodeChar(%c, 0) = %v wants %v", char, encchar, expchar)
}
_, err = es.EncodeChar(char, 1)
if err == nil {
t.Errorf("EncodeChar(%c, 1) must be error", char)
}
_, err = es.EncodeChar('あ', 0)
if err == nil {
t.Errorf("EncodeChar(あ, 0) must be error")
}

encstr, err := es.EncodeString(str, 0)
if err != nil {
t.Errorf("EncodeString(%q, 0) error: %v", str, err)
} else if !reflect.DeepEqual(encstr, expstr) {
t.Errorf("EncodeString(%q, 0) = %v wants %v", str, encstr, expstr)
}
_, err = es.EncodeString(str, 1)
if err == nil {
t.Errorf("EncodeString(%q, 1) must be error", str)
}
_, err = es.EncodeString("あ", 0)
if err == nil {
t.Errorf("EncodeString(\"あ\", 0) must be error")
}
}
Loading