00001 /* 00002 * Copyright (c) Ian F. Darwin 1986-1995. 00003 * Software written by Ian F. Darwin and others; 00004 * maintained 1995-present by Christos Zoulas and others. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 1. Redistributions of source code must retain the above copyright 00010 * notice immediately at the beginning of the file, without modification, 00011 * this list of conditions, and the following disclaimer. 00012 * 2. Redistributions in binary form must reproduce the above copyright 00013 * notice, this list of conditions and the following disclaimer in the 00014 * documentation and/or other materials provided with the distribution. 00015 * 3. All advertising materials mentioning features or use of this software 00016 * must display the following acknowledgement: 00017 * This product includes software developed by Ian F. Darwin and others. 00018 * 4. The name of the author may not be used to endorse or promote products 00019 * derived from this software without specific prior written permission. 00020 * 00021 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 00022 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00023 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00024 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 00025 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00026 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00027 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00028 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00029 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00030 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00031 * SUCH DAMAGE. 00032 */ 00033 /* 00034 * Names.h - names and types used by ascmagic in file(1). 00035 * These tokens are here because they can appear anywhere in 00036 * the first HOWMANY bytes, while tokens in MAGIC must 00037 * appear at fixed offsets into the file. Don't make HOWMANY 00038 * too high unless you have a very fast CPU. 00039 * 00040 * Id: names.h,v 1.21 2003/03/23 21:16:26 christos Exp 00041 */ 00042 00043 /* 00044 modified by Chris Lowth - 9 April 2000 00045 to add mime type strings to the types table. 00046 */ 00047 00048 /* these types are used to index the table 'types': keep em in sync! */ 00049 #define L_C 0 /* first and foremost on UNIX */ 00050 #define L_CC 1 /* Bjarne's postincrement */ 00051 #define L_FORT 2 /* the oldest one */ 00052 #define L_MAKE 3 /* Makefiles */ 00053 #define L_PLI 4 /* PL/1 */ 00054 #define L_MACH 5 /* some kinda assembler */ 00055 #define L_ENG 6 /* English */ 00056 #define L_PAS 7 /* Pascal */ 00057 #define L_MAIL 8 /* Electronic mail */ 00058 #define L_NEWS 9 /* Usenet Netnews */ 00059 #define L_JAVA 10 /* Java code */ 00060 #define L_HTML 11 /* HTML */ 00061 #define L_BCPL 12 /* BCPL */ 00062 #define L_M4 13 /* M4 */ 00063 00064 /*@unchecked@*/ /*@unused@*/ /*@observer@*/ 00065 static const struct { 00066 /*@observer@*/ /*@null@*/ 00067 const char *human; 00068 /*@observer@*/ /*@null@*/ 00069 const char *mime; 00070 } types[] = { 00071 { "C program", "text/x-c", }, 00072 { "C++ program", "text/x-c++" }, 00073 { "FORTRAN program", "text/x-fortran" }, 00074 { "make commands", "text/x-makefile" }, 00075 { "PL/1 program", "text/x-pl1" }, 00076 { "assembler program", "text/x-asm" }, 00077 { "English", "text/plain, English" }, 00078 { "Pascal program", "text/x-pascal" }, 00079 { "mail", "text/x-mail" }, 00080 { "news", "text/x-news" }, 00081 { "Java program", "text/x-java" }, 00082 { "HTML document", "text/html", }, 00083 { "BCPL program", "text/x-bcpl" }, 00084 { "M4 macro language pre-processor", "text/x-m4" }, 00085 { "can't happen error on names.h/types", "error/x-error" }, 00086 { 0, 0} 00087 }; 00088 00089 /* 00090 * XXX - how should we distinguish Java from C++? 00091 * The trick used in a Debian snapshot, of having "extends" or "implements" 00092 * as tags for Java, doesn't work very well, given that those keywords 00093 * are often preceded by "class", which flags it as C++. 00094 * 00095 * Perhaps we need to be able to say 00096 * 00097 * If "class" then 00098 * 00099 * if "extends" or "implements" then 00100 * Java 00101 * else 00102 * C++ 00103 * endif 00104 * 00105 * Or should we use other keywords, such as "package" or "import"? 00106 * Unfortunately, Ada95 uses "package", and Modula-3 uses "import", 00107 * although I infer from the language spec at 00108 * 00109 * http://www.research.digital.com/SRC/m3defn/html/m3.html 00110 * 00111 * that Modula-3 uses "IMPORT" rather than "import", i.e. it must be 00112 * in all caps. 00113 * 00114 * So, for now, we go with "import". We must put it before the C++ 00115 * stuff, so that we don't misidentify Java as C++. Not using "package" 00116 * means we won't identify stuff that defines a package but imports 00117 * nothing; hopefully, very little Java code imports nothing (one of the 00118 * reasons for doing OO programming is to import as much as possible 00119 * and write only what you need to, right?). 00120 * 00121 * Unfortunately, "import" may cause us to misidentify English text 00122 * as Java, as it comes after "the" and "The". Perhaps we need a fancier 00123 * heuristic to identify Java? 00124 */ 00125 /*@unchecked@*/ /*@unused@*/ /*@observer@*/ 00126 static struct names { 00127 /*@observer@*/ /*@null@*/ 00128 const char *name; 00129 short type; 00130 } names[] = { 00131 /* These must be sorted by eye for optimal hit rate */ 00132 /* Add to this list only after substantial meditation */ 00133 {"dnl", L_M4}, 00134 {"import", L_JAVA}, 00135 {"\"libhdr\"", L_BCPL}, 00136 {"\"LIBHDR\"", L_BCPL}, 00137 {"//", L_CC}, 00138 {"template", L_CC}, 00139 {"virtual", L_CC}, 00140 {"class", L_CC}, 00141 {"public:", L_CC}, 00142 {"private:", L_CC}, 00143 {"/*", L_C}, /* must precede "The", "the", etc. */ 00144 {"#include", L_C}, 00145 {"char", L_C}, 00146 {"The", L_ENG}, 00147 {"the", L_ENG}, 00148 {"double", L_C}, 00149 {"extern", L_C}, 00150 {"float", L_C}, 00151 {"struct", L_C}, 00152 {"union", L_C}, 00153 {"CFLAGS", L_MAKE}, 00154 {"LDFLAGS", L_MAKE}, 00155 {"all:", L_MAKE}, 00156 {".PRECIOUS", L_MAKE}, 00157 /* Too many files of text have these words in them. Find another way 00158 * to recognize Fortrash. 00159 */ 00160 #ifdef NOTDEF 00161 {"subroutine", L_FORT}, 00162 {"function", L_FORT}, 00163 {"block", L_FORT}, 00164 {"common", L_FORT}, 00165 {"dimension", L_FORT}, 00166 {"integer", L_FORT}, 00167 {"data", L_FORT}, 00168 #endif /*NOTDEF*/ 00169 {".ascii", L_MACH}, 00170 {".asciiz", L_MACH}, 00171 {".byte", L_MACH}, 00172 {".even", L_MACH}, 00173 {".globl", L_MACH}, 00174 {".text", L_MACH}, 00175 {"clr", L_MACH}, 00176 {"(input,", L_PAS}, 00177 {"dcl", L_PLI}, 00178 {"Received:", L_MAIL}, 00179 {">From", L_MAIL}, 00180 {"Return-Path:",L_MAIL}, 00181 {"Cc:", L_MAIL}, 00182 {"Newsgroups:", L_NEWS}, 00183 {"Path:", L_NEWS}, 00184 {"Organization:",L_NEWS}, 00185 {"href=", L_HTML}, 00186 {"HREF=", L_HTML}, 00187 {"<body", L_HTML}, 00188 {"<BODY", L_HTML}, 00189 {NULL, 0} 00190 }; 00191 #define NNAMES ((sizeof(names)/sizeof(struct names)) - 1)