/*
/ Character classification routines (charclass.zir)
/ Copyright (c) 0CodErr, KolibriOS team
*/
const CHAR_DIGIT = 1 << 0; // 1b ; 0 bit
const CHAR_UPPER = 1 << 1; // 10b ; 1 bit
const CHAR_LOWER = 1 << 2; // 100b ; 2 bit
const CHAR_CONTROL = 1 << 3; // 1000b ; 3 bit
const CHAR_WHITESPACE = 1 << 4; // 10000b ; 4 bit
const CHAR_PUNCT = 1 << 5; // 100000b ; 5 bit
const CHAR_LETTER = CHAR_LOWER | CHAR_UPPER;
const CHAR_CONTROL_WHITESPACE = CHAR_CONTROL | CHAR_WHITESPACE;
char CHAR_TABLE[256] = [
// 0..15
CHAR_CONTROL, // 00 (NUL)
CHAR_CONTROL, // 01 (SOH)
CHAR_CONTROL, // 02 (STX)
CHAR_CONTROL, // 03 (ETX)
CHAR_CONTROL, // 04 (EOT)
CHAR_CONTROL, // 05 (ENQ)
CHAR_CONTROL, // 06 (ACK)
CHAR_CONTROL, // 07 (BEL)
CHAR_CONTROL, // 08 (BS)
CHAR_CONTROL_WHITESPACE, // 09 (HT)
CHAR_CONTROL_WHITESPACE, // 0A (LF)
CHAR_CONTROL_WHITESPACE, // 0B (VT)
CHAR_CONTROL_WHITESPACE, // 0C (FF)
CHAR_CONTROL_WHITESPACE, // 0D (CR)
CHAR_CONTROL, // 0E (SI)
CHAR_CONTROL, // 0F (SO)
// 16..31
CHAR_CONTROL, // 10 (DLE)
CHAR_CONTROL, // 11 (DC1)
CHAR_CONTROL, // 12 (DC2)
CHAR_CONTROL, // 13 (DC3)
CHAR_CONTROL, // 14 (DC4)
CHAR_CONTROL, // 15 (NAK)
CHAR_CONTROL, // 16 (SYN)
CHAR_CONTROL, // 17 (ETB)
CHAR_CONTROL, // 18 (CAN)
CHAR_CONTROL, // 19 (EM)
CHAR_CONTROL, // 1A (SUB)
CHAR_CONTROL, // 1B (ESC)
CHAR_CONTROL, // 1C (FS)
CHAR_CONTROL, // 1D (GS)
CHAR_CONTROL, // 1E (RS)
CHAR_CONTROL, // 1F (US)
// 32..47
CHAR_WHITESPACE, // 20 SPACE
CHAR_PUNCT, // 21 !
CHAR_PUNCT, // 22 "
CHAR_PUNCT, // 23 #
CHAR_PUNCT, // 24 $
CHAR_PUNCT, // 25 %
CHAR_PUNCT, // 26 &
CHAR_PUNCT, // 27 '
CHAR_PUNCT, // 28 (
CHAR_PUNCT, // 29 )
CHAR_PUNCT, // 2A *
CHAR_PUNCT, // 2B +
CHAR_PUNCT, // 2C ,
CHAR_PUNCT, // 2D -
CHAR_PUNCT, // 2E .
CHAR_PUNCT, // 2F /
// 48..63
CHAR_DIGIT, // 30 0
CHAR_DIGIT, // 31 1
CHAR_DIGIT, // 32 2
CHAR_DIGIT, // 33 3
CHAR_DIGIT, // 34 4
CHAR_DIGIT, // 35 5
CHAR_DIGIT, // 36 6
CHAR_DIGIT, // 37 7
CHAR_DIGIT, // 38 8
CHAR_DIGIT, // 39 9
CHAR_PUNCT, // 3A :
CHAR_PUNCT, // 3B ;
CHAR_PUNCT, // 3C <
CHAR_PUNCT, // 3D =
CHAR_PUNCT, // 3E >
CHAR_PUNCT, // 3F ?
// 64..79
CHAR_PUNCT, // 40 @
CHAR_UPPER, // 41 A
CHAR_UPPER, // 42 B
CHAR_UPPER, // 43 C
CHAR_UPPER, // 44 D
CHAR_UPPER, // 45 E
CHAR_UPPER, // 46 F
CHAR_UPPER, // 47 G
CHAR_UPPER, // 48 H
CHAR_UPPER, // 49 I
CHAR_UPPER, // 4A J
CHAR_UPPER, // 4B K
CHAR_UPPER, // 4C L
CHAR_UPPER, // 4D M
CHAR_UPPER, // 4E N
CHAR_UPPER, // 4F O
// 80..95
CHAR_UPPER, // 50 P
CHAR_UPPER, // 51 Q
CHAR_UPPER, // 52 R
CHAR_UPPER, // 53 S
CHAR_UPPER, // 54 T
CHAR_UPPER, // 55 U
CHAR_UPPER, // 56 V
CHAR_UPPER, // 57 W
CHAR_UPPER, // 58 X
CHAR_UPPER, // 59 Y
CHAR_UPPER, // 5A Z
CHAR_PUNCT, // 5B [
CHAR_PUNCT, // 5C \
CHAR_PUNCT, // 5D ]
CHAR_PUNCT, // 5E ^
CHAR_PUNCT, // 5F _
// 96..111
CHAR_PUNCT, // 60 `
CHAR_LOWER, // 61 a
CHAR_LOWER, // 62 b
CHAR_LOWER, // 63 c
CHAR_LOWER, // 64 d
CHAR_LOWER, // 65 e
CHAR_LOWER, // 66 f
CHAR_LOWER, // 67 g
CHAR_LOWER, // 68 h
CHAR_LOWER, // 69 i
CHAR_LOWER, // 6A j
CHAR_LOWER, // 6B k
CHAR_LOWER, // 6C l
CHAR_LOWER, // 6D m
CHAR_LOWER, // 6E n
CHAR_LOWER, // 6F o
// 112..127
CHAR_LOWER, // 70 p
CHAR_LOWER, // 71 q
CHAR_LOWER, // 72 r
CHAR_LOWER, // 73 s
CHAR_LOWER, // 74 t
CHAR_LOWER, // 75 u
CHAR_LOWER, // 76 v
CHAR_LOWER, // 77 w
CHAR_LOWER, // 78 x
CHAR_LOWER, // 79 y
CHAR_LOWER, // 7A z
CHAR_PUNCT, // 7B {
CHAR_PUNCT, // 7C |
CHAR_PUNCT, // 7D }
CHAR_PUNCT, // 7E ~
CHAR_CONTROL // 7F (DEL)
];
inline function IsDigit($symbol) {
$C = CHAR_DIGIT; gp_ax = 0; al = $symbol;
$if tokentype($_operand) == TOKEN_FLAG:
test CHAR_TABLE[gp_ax], $C;
$res = ZF;
$else
al = CHAR_TABLE[gp_ax]; al &= $C;
$res = gp_ax;
$end;
$return $res;
}
inline function IsLetter($symbol) {
$C = CHAR_LETTER; gp_ax = 0; al = $symbol;
$if tokentype($_operand) == TOKEN_FLAG:
test CHAR_TABLE[gp_ax], $C;
$res = ZF;
$else
al = CHAR_TABLE[gp_ax]; al &= $C;
$res = gp_ax;
$end;
$return $res;
}
inline function IsUpper($symbol) {
$C = CHAR_UPPER; gp_ax = 0; al = $symbol;
$if tokentype($_operand) == TOKEN_FLAG:
test CHAR_TABLE[gp_ax], $C;
$res = ZF;
$else
al = CHAR_TABLE[gp_ax]; al &= $C;
$res = gp_ax;
$end;
$return $res;
}
inline function IsLower($symbol) {
$C = CHAR_LOWER; gp_ax = 0; al = $symbol;
$if tokentype($_operand) == TOKEN_FLAG:
test CHAR_TABLE[gp_ax], $C;
$res = ZF;
$else
al = CHAR_TABLE[gp_ax]; al &= $C;
$res = gp_ax;
$end;
$return $res;
}
inline function IsControl($symbol) {
$C = CHAR_CONTROL; gp_ax = 0; al = $symbol;
$if tokentype($_operand) == TOKEN_FLAG:
test CHAR_TABLE[gp_ax], $C;
$res = ZF;
$else
al = CHAR_TABLE[gp_ax]; al &= $C;
$res = gp_ax;
$end;
$return $res;
}
inline function IsWhiteSpace($symbol) {
$C = CHAR_WHITESPACE; gp_ax = 0; al = $symbol;
$if tokentype($_operand) == TOKEN_FLAG:
test CHAR_TABLE[gp_ax], $C;
$res = ZF;
$else
al = CHAR_TABLE[gp_ax]; al &= $C;
$res = gp_ax;
$end;
$return $res;
}
inline function IsPunct($symbol) {
$C = CHAR_PUNCT; gp_ax = 0; al = $symbol;
$if tokentype($_operand) == TOKEN_FLAG:
test CHAR_TABLE[gp_ax], $C;
$res = ZF;
$else
al = CHAR_TABLE[gp_ax]; al &= $C;
$res = gp_ax;
$end;
$return $res;
}
inline function GetCharType($symbol) {
gp_ax = 0; al = $symbol;
al = CHAR_TABLE[gp_ax];
$return gp_ax;
}
//
// token types used internally by Ziron Assembler
//
#define TOKEN_EOF 2;
#define TOKEN_SYM 3;
#define TOKEN_IDENT 100;
#define TOKEN_NUMBER 200;
#define TOKEN_FLOAT 201;
#define TOKEN_SEGMENT 204;
#define TOKEN_REGISTER 205;
#define TOKEN_ST 206;
#define TOKEN_XMM 207;
#define TOKEN_BASEEXP 210;
#define TOKEN_ANSISTRING 300;
#define TOKEN_WIDESTRING 301;
#define TOKEN_FLAG 302;
#define TOKEN_EFLAG 303;
00000000 33C0 xor eax,eax
00000002 B035 mov al,0x35
00000004 8A8021000000 mov al,[eax+0x21]
0000000A 2401 and al,0x1
0000000C A321010000 mov [0x121],eax
00000011 33C0 xor eax,eax
00000013 B035 mov al,0x35
00000015 8A8021000000 mov al,[eax+0x21]
0000001B 2401 and al,0x1
0000001D 85C0 test eax,eax
0000001F 7400 jz 0x21
//............................................
inline function IsDigit($symbol) {
$echo tokentype($_operand); // <<<<<<<<<<<
$C = CHAR_DIGIT; gp_ax = 0; al = $symbol;
$if tokentype($_operand) == TOKEN_FLAG:
test CHAR_TABLE[gp_ax], $C;
$res = ZF;
$else
al = CHAR_TABLE[gp_ax]; al &= $C;
$res = gp_ax;
$end;
$return $res;
}