0 people like it.

Base32 Cyrillic

Russian alphabet has 33 letters and since Ё is not frequently used, it makes the perfect candidate for Base32 encoding.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
69: 
70: 
71: 
72: 
73: 
74: 
75: 
76: 
77: 
78: 
79: 
80: 
81: 
82: 
83: 
84: 
85: 
86: 
87: 
88: 
89: 
90: 
91: 
92: 
/// Encodes a UTF8 string to a Base32Cyr string
let encodeBase32Cyr text =
  let quintupletToList ending (x0, x1, x2, x3, x4) =
    let A = "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧЩЗЪЫЬЭЮЯ="

    let quintuplet = (int64 x0 <<< 32)
                 ||| (int64 x1 <<< 24)
                 ||| (int64 x2 <<< 16)
                 ||| (int64 x3 <<<  8)
                 ||| (int64 x4)
                 
    let a0 = (quintuplet &&& 0xF800000000L) >>> 35 |> int
    let a1 = (quintuplet &&& 0x07C0000000L) >>> 30 |> int
    let a2 = (quintuplet &&& 0x003E000000L) >>> 25 |> int
    let a3 = (quintuplet &&& 0x0001F00000L) >>> 20 |> int
    let a4 = (quintuplet &&& 0x00000F8000L) >>> 15 |> int
    let a5 = (quintuplet &&& 0x0000007C00L) >>> 10 |> int
    let a6 = (quintuplet &&& 0x00000003E0L) >>>  5 |> int
    let a7 = (quintuplet &&& 0x000000001FL)        |> int
    
    match ending with
    | 1 -> [A.[a0]; A.[a1];   '=' ;   '=' ;   '=' ;   '=' ;   '=' ;   '=' ;] // 01======
    | 2 -> [A.[a0]; A.[a1]; A.[a2]; A.[a3];   '=' ;   '=' ;   '=' ;   '=' ;] // 0123====
    | 3 -> [A.[a0]; A.[a1]; A.[a2]; A.[a3]; A.[a4];   '=' ;   '=' ;   '=' ;] // 01234===
    | 4 -> [A.[a0]; A.[a1]; A.[a2]; A.[a3]; A.[a4]; A.[a5]; A.[a6];   '=' ;] // 0123456=
    | _ -> [A.[a0]; A.[a1]; A.[a2]; A.[a3]; A.[a4]; A.[a5]; A.[a6]; A.[a7];] // 01234567

  let rec parse result input =
    match input with
    | x0 :: x1 :: x2 :: x3 :: x4 :: tail -> parse (result @ quintupletToList 5 (x0, x1, x2, x3, x4)) tail
    | x0 :: x1 :: x2 :: x3 :: []         -> result @ quintupletToList 4 (x0,  x1,  x2,  x3, 0uy)
    | x0 :: x1 :: x2 :: []               -> result @ quintupletToList 3 (x0,  x1,  x2, 0uy, 0uy)
    | x0 :: x1 :: []                     -> result @ quintupletToList 2 (x0,  x1, 0uy, 0uy, 0uy)
    | x0 :: []                           -> result @ quintupletToList 1 (x0, 0uy, 0uy, 0uy, 0uy)
    | []                                 -> result

  (text:string)
  |> System.Text.Encoding.UTF8.GetBytes
  |> Array.toList
  |> parse []
  |> List.toArray
  |> System.String.Concat



/// Decodes a Base32Cyr string to a UTF8 string
let decodeBase32Cyr text = 
  if (text:string).Length % 8 <> 0 then "" else

    let A = [for c in "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧЩЗЪЫЬЭЮЯ=" -> c]
            |> List.mapi (fun i a -> a, i)
            |> Map.ofList
  
    let octetToList ending (a0, a1, a2, a3, a4, a5, a6, a7) =
      let inline (.@) (m: Map<char, int>) key = try m.[System.Char.ToUpper key] |> int64 with _ -> 0L in
      
      let octet = (A.@ a0 &&& 0x1FL <<< 35) ||| (A.@ a1 &&& 0x1FL <<< 30)
              ||| (A.@ a2 &&& 0x1FL <<< 25) ||| (A.@ a3 &&& 0x1FL <<< 20)
              ||| (A.@ a4 &&& 0x1FL <<< 15) ||| (A.@ a5 &&& 0x1FL <<< 10)
              ||| (A.@ a6 &&& 0x1FL <<<  5) ||| (A.@ a7 &&& 0x1FL)
              
      let x0,x1,x2,x3,x4 = (octet &&& 0xFF00000000L) >>> 32 |> byte,
                           (octet &&& 0x00FF000000L) >>> 24 |> byte,
                           (octet &&& 0x0000FF0000L) >>> 16 |> byte,
                           (octet &&& 0x000000FF00L) >>>  8 |> byte,
                           (octet &&& 0x00000000FFL)        |> byte
      match ending with
      | 2 -> [x0;]
      | 4 -> [x0; x1;]
      | 5 -> [x0; x1; x2;]
      | 7 -> [x0; x1; x2; x3;]
      | _ -> [x0; x1; x2; x3; x4;]
      
    let rec parse result input =
      match input with
      | a0::a1::'='::_                       -> result @ octetToList 2 (a0, a1,'=','=','=','=','=','=')
      | a0::a1::a2::a3::'='::_               -> result @ octetToList 4 (a0, a1, a2, a3,'=','=','=','=')
      | a0::a1::a2::a3::a4::'='::_           -> result @ octetToList 5 (a0, a1, a2, a3, a4,'=','=','=')
      | a0::a1::a2::a3::a4::a5::a6::'='::_   -> result @ octetToList 7 (a0, a1, a2, a3, a4, a5, a6,'=')
      | a0::a1::a2::a3::a4::a5::a6::a7::tail -> parse (result @ octetToList 8 (a0, a1, a2, a3, a4, a5, a6, a7)) tail
      | _                                    -> result

    [for c in text -> c]
    |> parse []
    |> List.toArray
    |> System.Text.Encoding.UTF8.GetString

    
// Usage
encodeBase32Cyr "Putin"     // "КБЪЧИЪЛО"
decodeBase32Cyr "ЙНГФД==="  // "KGB"
    
val encodeBase32Cyr : text:string -> string

Full name: Script.encodeBase32Cyr


 Encodes a UTF8 string to a Base32Cyr string
val text : string
val quintupletToList : (int -> byte * byte * byte * byte * byte -> char list)
val ending : int
val x0 : byte
val x1 : byte
val x2 : byte
val x3 : byte
val x4 : byte
val A : string
val quintuplet : int64
Multiple items
val int64 : value:'T -> int64 (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int64

--------------------
type int64 = System.Int64

Full name: Microsoft.FSharp.Core.int64

--------------------
type int64<'Measure> = int64

Full name: Microsoft.FSharp.Core.int64<_>
val a0 : int
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
val a1 : int
val a2 : int
val a3 : int
val a4 : int
val a5 : int
val a6 : int
val a7 : int
val parse : (char list -> byte list -> char list)
val result : char list
val input : byte list
val tail : byte list
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = System.String

Full name: Microsoft.FSharp.Core.string
namespace System
namespace System.Text
type Encoding =
  member BodyName : string
  member Clone : unit -> obj
  member CodePage : int
  member DecoderFallback : DecoderFallback with get, set
  member EncoderFallback : EncoderFallback with get, set
  member EncodingName : string
  member Equals : value:obj -> bool
  member GetByteCount : chars:char[] -> int + 3 overloads
  member GetBytes : chars:char[] -> byte[] + 5 overloads
  member GetCharCount : bytes:byte[] -> int + 2 overloads
  ...

Full name: System.Text.Encoding
property System.Text.Encoding.UTF8: System.Text.Encoding
System.Text.Encoding.GetBytes(s: string) : byte []
System.Text.Encoding.GetBytes(chars: char []) : byte []
System.Text.Encoding.GetBytes(chars: char [], index: int, count: int) : byte []
System.Text.Encoding.GetBytes(chars: nativeptr<char>, charCount: int, bytes: nativeptr<byte>, byteCount: int) : int
System.Text.Encoding.GetBytes(s: string, charIndex: int, charCount: int, bytes: byte [], byteIndex: int) : int
System.Text.Encoding.GetBytes(chars: char [], charIndex: int, charCount: int, bytes: byte [], byteIndex: int) : int
module Array

from Microsoft.FSharp.Collections
val toList : array:'T [] -> 'T list

Full name: Microsoft.FSharp.Collections.Array.toList
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member GetSlice : startIndex:int option * endIndex:int option -> 'T list
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val toArray : list:'T list -> 'T []

Full name: Microsoft.FSharp.Collections.List.toArray
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

Full name: System.String

--------------------
System.String(value: nativeptr<char>) : unit
System.String(value: nativeptr<sbyte>) : unit
System.String(value: char []) : unit
System.String(c: char, count: int) : unit
System.String(value: nativeptr<char>, startIndex: int, length: int) : unit
System.String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
System.String(value: char [], startIndex: int, length: int) : unit
System.String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: System.Text.Encoding) : unit
System.String.Concat([<System.ParamArray>] values: string []) : string
   (+0 other overloads)
System.String.Concat(values: System.Collections.Generic.IEnumerable<string>) : string
   (+0 other overloads)
System.String.Concat<'T>(values: System.Collections.Generic.IEnumerable<'T>) : string
   (+0 other overloads)
System.String.Concat([<System.ParamArray>] args: obj []) : string
   (+0 other overloads)
System.String.Concat(arg0: obj) : string
   (+0 other overloads)
System.String.Concat(str0: string, str1: string) : string
   (+0 other overloads)
System.String.Concat(arg0: obj, arg1: obj) : string
   (+0 other overloads)
System.String.Concat(str0: string, str1: string, str2: string) : string
   (+0 other overloads)
System.String.Concat(arg0: obj, arg1: obj, arg2: obj) : string
   (+0 other overloads)
System.String.Concat(str0: string, str1: string, str2: string, str3: string) : string
   (+0 other overloads)
val decodeBase32Cyr : text:string -> string

Full name: Script.decodeBase32Cyr


 Decodes a Base32Cyr string to a UTF8 string
val A : Map<char,int>
val c : char
val mapi : mapping:(int -> 'T -> 'U) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.mapi
val i : int
val a : char
Multiple items
module Map

from Microsoft.FSharp.Collections

--------------------
type Map<'Key,'Value (requires comparison)> =
  interface IEnumerable
  interface IComparable
  interface IEnumerable<KeyValuePair<'Key,'Value>>
  interface ICollection<KeyValuePair<'Key,'Value>>
  interface IDictionary<'Key,'Value>
  new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
  member Add : key:'Key * value:'Value -> Map<'Key,'Value>
  member ContainsKey : key:'Key -> bool
  override Equals : obj -> bool
  member Remove : key:'Key -> Map<'Key,'Value>
  ...

Full name: Microsoft.FSharp.Collections.Map<_,_>

--------------------
new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
val ofList : elements:('Key * 'T) list -> Map<'Key,'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Map.ofList
val octetToList : (int -> char * char * char * char * char * char * char * char -> byte list)
val a0 : char
val a1 : char
val a2 : char
val a3 : char
val a4 : char
val a5 : char
val a6 : char
val a7 : char
val m : Map<char,int>
Multiple items
val char : value:'T -> char (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.char

--------------------
type char = System.Char

Full name: Microsoft.FSharp.Core.char
val key : char
type Char =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 1 overload
    static val MaxValue : char
    static val MinValue : char
    static member ConvertFromUtf32 : utf32:int -> string
    static member ConvertToUtf32 : highSurrogate:char * lowSurrogate:char -> int + 1 overload
    static member GetNumericValue : c:char -> float + 1 overload
    ...
  end

Full name: System.Char
System.Char.ToUpper(c: char) : char
System.Char.ToUpper(c: char, culture: System.Globalization.CultureInfo) : char
val octet : int64
Multiple items
val byte : value:'T -> byte (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.byte

--------------------
type byte = System.Byte

Full name: Microsoft.FSharp.Core.byte
val parse : (byte list -> char list -> byte list)
val result : byte list
val input : char list
val tail : char list
System.Text.Encoding.GetString(bytes: byte []) : string
System.Text.Encoding.GetString(bytes: byte [], index: int, count: int) : string
Next Version Raw view Test code New version

More information

Link:http://fssnip.net/7PU
Posted:4 years ago
Author:Dmitry Achkasov
Tags: base32 decoding , base32 encoding , base32