String
The definition of TString
is
/*
** Header for string value; string bytes follow the end of this structure
** (aligned according to 'UTString'; see next).
*/
typedef struct TString {
CommonHeader;
lu_byte extra; /* reserved words for short strings; "has hash" for longs */
lu_byte shrlen; /* length for short strings */
unsigned int hash;
union {
size_t lnglen; /* length for long strings */
struct TString *hnext; /* linked list for hash table */
} u;
} TString;
/*
** Ensures that address after this type is always fully aligned.
*/
typedef union UTString {
L_Umaxalign dummy; /* ensures maximum alignment for strings */
TString tsv;
} UTString;
/* type to ensure maximum alignment */
typedef union {
lua_Number n;
double u;
void *s;
lua_Integer i;
long l;
} L_Umaxalign;
Note:
TString
contains only the metadata of a string. There's no actual string body member defined in the struct.- There are two types of
TString
: long string and short string. UTString
is defined to ensure maximum alignment.
Creating a String
To create a string, first, we check if the string already exist in the global string cache. Only create new string when it's not in the cache.
TString *luaS_new (lua_State *L, const char *str) {
unsigned int i = point2uint(str) % STRCACHE_N; /* hash */
int j;
TString **p = G(L)->strcache[i];
for (j = 0; j < STRCACHE_M; j++) {
if (strcmp(str, getstr(p[j])) == 0) /* hit? */
return p[j]; /* that is it */
}
/* normal route */
for (j = STRCACHE_M - 1; j > 0; j--)
p[j] = p[j - 1]; /* move out last element */
/* new element is first in the list */
p[0] = luaS_newlstr(L, str, strlen(str));
return p[0];
}
If the length of the string is greater than LUAI_MAXSHORTLEN
, then it's a long string, else it's a short string
TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
if (l <= LUAI_MAXSHORTLEN) /* short string? */
return internshrstr(L, str, l);
else {
TString *ts;
if (l >= (MAX_SIZE - sizeof(TString))/sizeof(char))
luaM_toobig(L);
ts = luaS_createlngstrobj(L, l);
memcpy(getstr(ts), str, l * sizeof(char));
return ts;
}
}
It's straight forward to create a long string.
TString *luaS_createlngstrobj (lua_State *L, size_t l) {
TString *ts = createstrobj(L, l, LUA_TLNGSTR, G(L)->seed);
ts->u.lnglen = l;
return ts;
}
static TString *createstrobj (lua_State *L, size_t l, int tag, unsigned int h) {
TString *ts;
GCObject *o;
size_t totalsize; /* total size of TString object */
totalsize = sizelstring(l);
o = luaC_newobj(L, tag, totalsize);
ts = gco2ts(o);
ts->hash = h;
ts->extra = 0;
getstr(ts)[l] = '\0'; /* ending 0 */
return ts;
}
#define sizelstring(l) (sizeof(union UTString) + ((l) + 1) * sizeof(char))
Previously, we've noticed that TString
does not store the string body as a struct member. Where does the string body actually go?
When we create a string, we allocate a slice of memory with length sizelstring(l)
, which is the size of TString
and the size of the string body. This is a technique called flexible array member. The technique allows us to store the actual string body in the memory space following the TString
.
note: C99 standardized flexible array member syntax. However, Lua stuck to ISO C for portability.
On the other hand, Short strings will be stored in the string table strt
in the global state because:
strt
is a hash table, which allows us to compare short strings efficiently with only their hash.- Avoid creating duplicated short strings.
- They can be managed with a separated
GCObject
linked-list, which makes the GC process more efficient.
#define sizelstring(l) (sizeof(union UTString) + ((l) + 1) * sizeof(char))
static TString *internshrstr (lua_State *L, const char *str, size_t l) {
TString *ts;
global_State *g = G(L);
unsigned int h = luaS_hash(str, l, g->seed);
TString **list = &g->strt.hash[lmod(h, g->strt.size)];
lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */
for (ts = *list; ts != NULL; ts = ts->u.hnext) {
if (l == ts->shrlen &&
(memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
/* found! */
if (isdead(g, ts)) /* dead (but not collected yet)? */
changewhite(ts); /* resurrect it */
return ts;
}
}
if (g->strt.nuse >= g->strt.size && g->strt.size <= MAX_INT/2) {
luaS_resize(L, g->strt.size * 2);
list = &g->strt.hash[lmod(h, g->strt.size)]; /* recompute with new size */
}
ts = createstrobj(L, l, LUA_TSHRSTR, h);
memcpy(getstr(ts), str, l * sizeof(char));
ts->shrlen = cast_byte(l);
ts->u.hnext = *list;
*list = ts;
g->strt.nuse++;
return ts;
}