/*
  Copyright Dave Bone 1998 - 2014 
  All Rights Reserved. 
  No part of this document may be reproduced without written consent from the author.
	
FILE:     identifier.lex
dates:    17 Juin 2003	
Purpose:  yacco2 identifier
Returned: T_identifier
Mod: include keyword handling that can have embeded "-" - 23 Mar. 2005
Note: "-" in Rminus_la. This protects the a-b expression in the 
parallel-la-boundary from being considered bad.
Take 2: use a bktrk strategy. If keyword then return it else see if
the suffix of "-" needs dropping to produce a shorter identifier.
*/
/@
@i "/usr/local/yacco2/copyright.w"
@** |identifier| thread.\fbreak
Does three things:\fbreak
1) build up strings starting with letters 
following by alphanumerics, underscore and hyphens\fbreak
2) determines whether its an identifier, keyword, 
or xxx-in-stbl where xxx one of rule, T.\fbreak
3) it watches for the possibility of a shortened 
identifier within the lookahead expression.\fbreak
This is caused by the hyphen being overloaded: within 
the lookahead expression it
is an operator. 
The default is the longest possible identifier string 
which could be a keyword.
Barring this  it is an identifier up to but excluding
the first hyphen. This is not pure in its error checking 
as the string could be in error within the 
lookahead expression and the faulty keyword being 
stated checked to the shortened
identifier.
Well the undefined identifier should  be caught
when the lookahead expression is postfixed evaluated against the symbol table.

To keep each drawing contained to a page, i broke the rules
having many subrules like
|RUPPER| into smaller rules: |RUPPER_A_M| and |RUPPER_N_Z|.

An Optimization:\fbreak
Bypass the PDA of the grammar and use a FA for the 
tail-gating characters. Dtrace exposes well optimization opportunities.
2) Optimize: rid use of string to build up symbol name: use a raw buffer. 
Dtrace showed its inefficiencies: strlen, memcpy, and mutex locks.\fbreak
Nov. 2008.\fbreak
@/
fsm	
(fsm-id	"identifier.lex",fsm-filename identifier,fsm-namespace NS_identifier
,fsm-class		Cidentifier{
  user-prefix-declaration
#include "yacco2_stbl.h"
  ***
  user-declaration
    public:
    int hyphen_pos_;
    int hyphen_idx_;
    CAbs_lr1_sym* hyphen_;
    char ddd_[1024];
    int ddd_idx_;
  ***  
  op
    hyphen_pos_ = -1;
    hyphen_ = 0;
    hyphen_idx_ = -1;
    ddd_idx_ = 0;
    ddd_[ddd_idx_] = 0;
 ***
  constructor
    hyphen_pos_ = -1;
    hyphen_ = 0;
    hyphen_idx_ = -1;
    ddd_idx_ = 0;
    ddd_[ddd_idx_] = 0;
 ***
  }
,fsm-version "1.0",fsm-date "17 Juin 2003",fsm-debug "false"
,fsm-comments	"Yacco2 identifiers lexer with symbol table lookup.")
parallel-parser	
(	
  parallel-thread-function
    TH_identifier
  ***
  parallel-la-boundary
    eolr
  ***
)
@"/usr/local/yacco2/compiler/grammars/yacco2_T_includes.T"

rules{
Ridentifier	 (
lhs {
/@
Check if the identifier is a keyword by use of the symbol table
and if so create the keyword and return it.
Now return as an identifier with the prefix not containing the hyphen.
@/
  op
    using namespace NS_yacco2_T_enum;
    using namespace NS_yacco2_terminals;
    using namespace yacco2_stbl;
    Cidentifier* fsm = (Cidentifier*) rule_info__.parser__->fsm_tbl__;
    CAbs_lr1_sym* sym(0);
	T_sym_tbl_report_card report_card;
	find_sym_in_stbl(report_card,*fsm->ddd_);
    if(report_card.action_ == T_sym_tbl_report_card::fnd) goto item_in_stbl;
    // strip out - suffix as in a-b where suffix -b is bktracked
    if(fsm->hyphen_ != 0){// re-align to - as lookahead
        rule_info__.parser__->override_current_token(*fsm->hyphen_,fsm->hyphen_pos_);
        fsm->ddd_[fsm->hyphen_idx_] = 0;
    }else{
      sym = new T_identifier((const char*)&fsm->ddd_);
      sym->set_rc(*rule_info__.parser__->start_token__,__FILE__,__LINE__);
      RSVP(sym);
      return;
    }
	find_sym_in_stbl(report_card,*fsm->ddd_);// relook up id without "-" sufx
    if(report_card.action_ == T_sym_tbl_report_card::fnd) goto item_in_stbl;
    sym = new T_identifier((const char*)&fsm->ddd_);
    sym->set_rc(*rule_info__.parser__->start_token__,__FILE__,__LINE__);
    RSVP(sym);
    return;
    
item_in_stbl:
    if(report_card.tbl_entry_->type_ != table_entry::keyword){
	  // return xxx-in-stbl where xxx one of rule, T
	  sym = report_card.tbl_entry_->symbol_; 
	  sym->set_rc(*rule_info__.parser__->start_token__,__FILE__,__LINE__);
	  RSVP(sym);
	  return;      
    }  
kw_handling:
    kw_in_stbl* kw_in = (kw_in_stbl*)report_card.tbl_entry_->symbol_;
    CAbs_lr1_sym* kw =  kw_in->keyword_in_stbl();
    CAbs_lr1_sym* nkw;
    switch(kw->enumerated_id__){
      case T_Enum::T_T_raw_characters_:{nkw = new T_raw_characters;break;}  
      case T_Enum::T_T_lr1_constant_symbols_:
	{nkw = new T_lr1_constant_symbols;break;}  
      case T_Enum::T_T_error_symbols_:{nkw = new T_error_symbols;break;}  
      case T_Enum::T_T_eocode_:{nkw = new T_eocode;break;}  

      case T_Enum::T_T_AD_:{nkw = new T_AD;break;}  
      case T_Enum::T_T_AB_:{nkw = new T_AB;break;}  
      case T_Enum::T_T_parallel_la_boundary_:
	{nkw = new T_parallel_la_boundary;break;}  
      case T_Enum::T_T_arbitrator_code_:{nkw = new T_arbitrator_code;break;}  

      case T_Enum::T_T_parallel_parser_:{nkw = new T_parallel_parser;break;}  
      case T_Enum::T_T_parallel_thread_function_:
	{nkw = new T_parallel_thread_function;break;}  
      case T_Enum::T_T_parallel_control_monitor_:
	{nkw = new T_parallel_control_monitor;break;}  
      case T_Enum::T_T_fsm_:{nkw = new T_fsm;break;}  
      case T_Enum::T_T_fsm_id_:{nkw = new T_fsm_id;break;}  
      case T_Enum::T_T_fsm_filename_:{nkw = new T_fsm_filename;break;}  
      case T_Enum::T_T_fsm_namespace_:{nkw = new T_fsm_namespace;break;}  
      case T_Enum::T_T_fsm_class_:{nkw = new T_fsm_class;break;}  
      case T_Enum::T_T_fsm_version_:{nkw = new T_fsm_version;break;}  
      case T_Enum::T_T_fsm_date_:{nkw = new T_fsm_date;break;}  
      case T_Enum::T_T_fsm_debug_:{nkw = new T_fsm_debug;break;}  
      case T_Enum::T_T_fsm_comments_:{nkw = new T_fsm_comments;break;}  
      case T_Enum::T_T_terminals_:{nkw = new T_terminals;break;}  
      case T_Enum::T_T_enumeration_:{nkw = new T_enumeration;break;}  
      case T_Enum::T_T_file_name_:{nkw = new T_file_name;break;}  
      case T_Enum::T_T_name_space_:{nkw = new T_name_space;break;}  
      case T_Enum::T_T_sym_class_:{nkw = new T_sym_class;break;}  
      case T_Enum::T_T_rules_:{nkw = new T_rules;break;}  
      case T_Enum::T_T_lhs_:{nkw = new T_lhs;break;}  
      case T_Enum::T_T_user_declaration_:{nkw = new T_user_declaration;break;}  
      case T_Enum::T_T_user_prefix_declaration_:
	{nkw = new T_user_prefix_declaration;break;}  
      case T_Enum::T_T_user_suffix_declaration_:
	{nkw = new T_user_suffix_declaration;break;}  
      case T_Enum::T_T_constructor_:{nkw = new T_constructor;break;}  
      case T_Enum::T_T_destructor_:{nkw = new T_destructor;break;}  
      case T_Enum::T_T_op_:{nkw = new T_op;break;}  
      case T_Enum::T_T_failed_:{nkw = new T_failed;break;}  
      case T_Enum::T_T_user_implementation_:
	{nkw = new T_user_implementation;break;}  
      case T_Enum::T_T_user_imp_tbl_:{nkw = new T_user_imp_tbl;break;}  
      case T_Enum::T_T_user_imp_sym_:{nkw = new T_user_imp_sym;break;}  
      case T_Enum::T_T_constant_defs_:{nkw = new T_constant_defs;break;}  
      case T_Enum::T_T_terminals_refs_:{nkw = new T_terminals_refs;break;}  
      case T_Enum::T_T_terminals_sufx_:{nkw = new T_terminals_sufx;break;}  
      case T_Enum::T_T_lrk_sufx_:{nkw = new T_lrk_sufx;break;}  
      case T_Enum::T_LR1_eog_:{nkw = new LR1_eog;break;}  
      case T_Enum::T_LR1_eolr_:{nkw = new LR1_eolr;break;}  
      case T_Enum::T_T_NULL_:{nkw = new T_NULL;break;} 
    }
    nkw->set_rc(*rule_info__.parser__->start_token__,__FILE__,__LINE__);
    RSVP(nkw);
    return;      
  ***
  }
){
  -> Rstart_char  
  -> Rstart_char Rid_suffix  
}

Rid_suffix	 () {
  -> |+| {
/@
Pure finite automata for identifier suffix.\fbreak
Capture the index of where the first hyphen is.
As length is relative to one, this is its index 
before i add it to the being built up string.
i could have added the character before and then
used the length minus one to get its index:
ahh the off by one count as the array operator is 
of course relative to zero.
Dave u and your rants...and rolls.

Note the 1st entry character into this rule is on the stack
which is what the stack frame uses to set the specific symbols.
The current token for the parser is now the lookahead character.
This is why I reset the parsing character upon entry 
to the previous token.
@/
    op
      Cidentifier* fsm = (Cidentifier*) rule_info__.parser__->fsm_tbl__;
      CAbs_lr1_sym* sym = sf->p1__;
      parser()->reset_current_token(parser()->current_token_pos()-1);// bk trk to previous chr which is on stack
      int id;
filter_char:
      id = sym->enumerated_id();
      if((id >= NS_yacco2_T_enum::T_Enum::T_raw_a_) 
                &&
         (id <= NS_yacco2_T_enum::T_Enum::T_raw_z_)) goto bld_str;
      if((id >= NS_yacco2_T_enum::T_Enum::T_raw_A_) 
                &&
         (id <= NS_yacco2_T_enum::T_Enum::T_raw_Z_)) goto bld_str;
      if((id >= NS_yacco2_T_enum::T_Enum::T_raw_0_) 
                &&
         (id <= NS_yacco2_T_enum::T_Enum::T_raw_9_)) goto bld_str;
      if(id == NS_yacco2_T_enum::T_Enum::T_raw_under_score_) goto bld_str;
      if(id == NS_yacco2_T_enum::T_Enum::T_raw_minus_){
       if(fsm->hyphen_ == 0){
		fsm->hyphen_pos_ = rule_info__.parser__->current_token_pos__-1;
		fsm->hyphen_ = sym;
		// note: len rel 1, idx rel 0: "-" not added yet to str, this will be its idx value
		fsm->hyphen_idx_ = fsm->ddd_idx_;
	  }
        goto bld_str;
      }
      return; // end-of-identifier
bld_str:
      fsm->ddd_[fsm->ddd_idx_] = sym->id__[0];
      ++fsm->ddd_idx_;
      fsm->ddd_[fsm->ddd_idx_] = 0;

      parser()->get_next_token();// as current token
      sym = parser()->current_token__;
      goto filter_char;
    ***
    } 
}

Rstart_char	 ()  {
  -> RUPPER_A_M  
  -> RUPPER_N_Z  
  -> Rlower_a_m  
  -> Rlower_n_z  
}

RUPPER_A_M   (
lhs {
  op
    Cidentifier* fsm = (Cidentifier*) rule_info__.parser__->fsm_tbl__;
    size_t pos = rule_info__.parser__->parse_stack__.top_sub__ - 1;
    CAbs_lr1_sym* sym = rule_info__.parser__->get_spec_stack_token(pos);
      fsm->ddd_[fsm->ddd_idx_] = sym->id__[0];
      ++fsm->ddd_idx_;
      fsm->ddd_[fsm->ddd_idx_] = 0;
  ***
  }
){
  -> A -> B -> C -> D -> E -> F -> G -> H -> I -> J -> K -> L -> M 
}

RUPPER_N_Z   (
lhs {
  op
    Cidentifier* fsm = (Cidentifier*) rule_info__.parser__->fsm_tbl__;
    size_t pos = rule_info__.parser__->parse_stack__.top_sub__ - 1;
    CAbs_lr1_sym* sym = rule_info__.parser__->get_spec_stack_token(pos);
      fsm->ddd_[fsm->ddd_idx_] = sym->id__[0];
      ++fsm->ddd_idx_;
      fsm->ddd_[fsm->ddd_idx_] = 0;
  ***
  }
){
  -> N -> O -> P -> Q 
  -> "R" // considered a Rule when not quoted!
  -> S -> T -> U -> V -> W -> X -> Y -> Z
}

Rlower_a_m   (
lhs {
  op
    Cidentifier* fsm = (Cidentifier*) rule_info__.parser__->fsm_tbl__;
    size_t pos = rule_info__.parser__->parse_stack__.top_sub__ - 1;
    CAbs_lr1_sym* sym = rule_info__.parser__->get_spec_stack_token(pos);
      fsm->ddd_[fsm->ddd_idx_] = sym->id__[0];
      ++fsm->ddd_idx_;
      fsm->ddd_[fsm->ddd_idx_] = 0;
  ***
  }
){
  -> a -> b -> c -> d -> e -> f -> g -> h -> i -> j -> k -> l -> m 
}

Rlower_n_z   (
lhs {
  op
    Cidentifier* fsm = (Cidentifier*) rule_info__.parser__->fsm_tbl__;
    size_t pos = rule_info__.parser__->parse_stack__.top_sub__ - 1;
    CAbs_lr1_sym* sym = rule_info__.parser__->get_spec_stack_token(pos);
      fsm->ddd_[fsm->ddd_idx_] = sym->id__[0];
      ++fsm->ddd_idx_;
      fsm->ddd_[fsm->ddd_idx_] = 0;
  ***
  }
){
  -> n -> o -> p -> q -> r -> s -> t -> u -> v -> w -> x -> y -> z
}

RNUMBERS   (
lhs {
  op
    Cidentifier* fsm = (Cidentifier*) rule_info__.parser__->fsm_tbl__;
    size_t pos = rule_info__.parser__->parse_stack__.top_sub__ - 1;
    CAbs_lr1_sym* sym = rule_info__.parser__->get_spec_stack_token(pos);
      fsm->ddd_[fsm->ddd_idx_] = sym->id__[0];
      ++fsm->ddd_idx_;
      fsm->ddd_[fsm->ddd_idx_] = 0;
  ***
  }
){
  -> 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 7 -> 8 -> 9
}
}// end of rules