Index: ext/standard/scanf.c
===================================================================
RCS file: /repository/php-src/ext/standard/scanf.c,v
retrieving revision 1.35
diff -u -p -d -r1.35 scanf.c
--- ext/standard/scanf.c 19 Sep 2006 10:38:31 -0000 1.35
+++ ext/standard/scanf.c 25 Dec 2006 14:12:38 -0000
@@ -119,6 +119,17 @@ typedef struct CharSet {
} *ranges;
} CharSet;
+typedef struct u_CharSet {
+ int exclude; /* 1 if this is an exclusion set. */
+ int nchars;
+ UChar *chars;
+ int nranges;
+ struct u_Range {
+ UChar start;
+ UChar end;
+ } *ranges;
+} u_CharSet;
+
/*
* Declarations for functions used only in this file.
*/
@@ -126,6 +137,9 @@ typedef struct CharSet {
static char *BuildCharSet(CharSet *cset, char *format);
static int CharInSet(CharSet *cset, int ch);
static void ReleaseCharSet(CharSet *cset);
+static UChar *u_BuildCharSet(u_CharSet *cset, UChar *format);
+static int u_CharInSet(u_CharSet *cset, UChar ch);
+static void u_ReleaseCharSet(u_CharSet *cset);
static inline void scan_set_error_return(int numVars, zval **return_value);
@@ -237,6 +251,114 @@ static char * BuildCharSet(CharSet *cset
}
/* }}} */
+/* {{{ u_BuildCharSet
+ *----------------------------------------------------------------------
+ *
+ * BuildCharSet --
+ *
+ * This function examines a character set format specification
+ * and builds a CharSet containing the individual characters and
+ * character ranges specified.
+ *
+ * Results:
+ * Returns the next format position.
+ *
+ * Side effects:
+ * Initializes the charset.
+ *
+ *----------------------------------------------------------------------
+ */
+static UChar * u_BuildCharSet(u_CharSet *cset, UChar *format)
+{
+ UChar *ch, start;
+ int nranges;
+ UChar *end;
+
+ memset(cset, 0, sizeof(u_CharSet));
+
+ ch = format;
+ if (*ch == 0x5E /* '^' */) {
+ cset->exclude = 1;
+ ch = ++format;
+ }
+ end = format + 1; /* verify this - cc */
+
+ /*
+ * Find the close bracket so we can overallocate the set.
+ */
+
+ if (*ch == 0x5D /* ']' */) {
+ ch = end++;
+ }
+ nranges = 0;
+ while (*ch != 0x5D /* ']' */) {
+ if (*ch == 0x2D /* '-' */) {
+ nranges++;
+ }
+ ch = end++;
+ }
+
+ cset->chars = safe_emalloc(sizeof(UChar), (end - format - 1), 0);
+ if (nranges > 0) {
+ cset->ranges = (struct u_Range *) safe_emalloc(sizeof(struct u_Range), nranges, 0);
+ } else {
+ cset->ranges = NULL;
+ }
+
+ /*
+ * Now build the character set.
+ */
+
+ cset->nchars = cset->nranges = 0;
+ ch = format++;
+ start = *ch;
+ if (*ch == 0x5D /* ']' */ || *ch == 0x2D /* '-' */) {
+ cset->chars[cset->nchars++] = *ch;
+ ch = format++;
+ }
+ while (*ch != 0x5D /* ']' */) {
+ if (*format == 0x2D /* '-' */) {
+ /*
+ * This may be the first character of a range, so don't add
+ * it yet.
+ */
+
+ start = *ch;
+ } else if (*ch == 0x2D /* '-' */) {
+ /*
+ * Check to see if this is the last character in the set, in which
+ * case it is not a range and we should add the previous character
+ * as well as the dash.
+ */
+
+ if (*format == 0x5D /* ']' */) {
+ cset->chars[cset->nchars++] = start;
+ cset->chars[cset->nchars++] = *ch;
+ } else {
+ ch = format++;
+
+ /*
+ * Check to see if the range is in reverse order.
+ */
+
+ if (start < *ch) {
+ cset->ranges[cset->nranges].start = start;
+ cset->ranges[cset->nranges].end = *ch;
+ } else {
+ cset->ranges[cset->nranges].start = *ch;
+ cset->ranges[cset->nranges].end = start;
+ }
+ cset->nranges++;
+ }
+ } else {
+ cset->chars[cset->nchars++] = *ch;
+ }
+ ch = format++;
+ }
+ return format;
+}
+/* }}} */
+
/* {{{ CharInSet
*----------------------------------------------------------------------
*
@@ -276,6 +398,45 @@ static int CharInSet(CharSet *cset, int
}
/* }}} */
+/* {{{ u_CharInSet
+ *----------------------------------------------------------------------
+ *
+ * CharInSet --
+ *
+ * Check to see if a character matches the given set.
+ *
+ * Results:
+ * Returns non-zero if the character matches the given set.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+static int u_CharInSet(u_CharSet *cset, UChar c)
+{
+ UChar ch = c;
+ int i, match = 0;
+
+ for (i = 0; i < cset->nchars; i++) {
+ if (cset->chars[i] == ch) {
+ match = 1;
+ break;
+ }
+ }
+ if (!match) {
+ for (i = 0; i < cset->nranges; i++) {
+ if ((cset->ranges[i].start <= ch)
+ && (ch <= cset->ranges[i].end)) {
+ match = 1;
+ break;
+ }
+ }
+ }
+ return (cset->exclude ? !match : match);
+}
+/* }}} */
+
/* {{{ ReleaseCharSet
*----------------------------------------------------------------------
*
@@ -300,6 +461,30 @@ static void ReleaseCharSet(CharSet *cset
}
/* }}} */
+/* {{{ u_ReleaseCharSet
+ *----------------------------------------------------------------------
+ *
+ * ReleaseCharSet --
+ *
+ * Free the storage associated with a character set.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+static void u_ReleaseCharSet(u_CharSet *cset)
+{
+ efree(cset->chars);
+ if (cset->ranges) {
+ efree(cset->ranges);
+ }
+}
+/* }}} */
+
/* {{{ ValidateFormat
*----------------------------------------------------------------------
*
@@ -579,6 +764,285 @@ error:
}
/* }}} */
+/* {{{ u_ValidateFormat
+ *----------------------------------------------------------------------
+ *
+ * ValidateFormat --
+ *
+ * Parse the format string and verify that it is properly formed
+ * and that there are exactly enough variables on the command line.
+ *
+ * Results:
+ * FAILURE or SUCCESS.
+ *
+ * Side effects:
+ * May set php_error based on abnormal conditions.
+ *
+ * Parameters :
+ * format The format string.
+ * numVars The number of variables passed to the scan command.
+ * totalSubs The number of variables that will be required.
+ *
+ *----------------------------------------------------------------------
+*/
+PHPAPI int u_ValidateFormat(UChar *format, int numVars, int *totalSubs)
+{
+#define STATIC_LIST_SIZE 16
+ int gotXpg, gotSequential, value, i, flags;
+ UChar *end, *ch = NULL;
+ int staticAssign[STATIC_LIST_SIZE];
+ int *nassign = staticAssign;
+ int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
+ TSRMLS_FETCH();
+
+ /*
+ * Initialize an array that records the number of times a variable
+ * is assigned to by the format string. We use this to detect if
+ * a variable is multiply assigned or left unassigned.
+ */
+
+ if (numVars > nspace) {
+ nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
+ nspace = numVars;
+ }
+ for (i = 0; i < nspace; i++) {
+ nassign[i] = 0;
+ }
+
+ xpgSize = objIndex = gotXpg = gotSequential = 0;
+
+ while (*format != 0x00) {
+ ch = format++;
+ flags = 0;
+
+ if (*ch != 0x25 /* '%' */) {
+ continue;
+ }
+ ch = format++;
+ if (*ch == 0x25 /* '%' */) {
+ continue;
+ }
+ if (*ch == 0x2A /* '*' */) {
+ flags |= SCAN_SUPPRESS;
+ ch = format++;
+ goto xpgCheckDone;
+ }
+
+ if ( u_isdigit( *ch ) ) {
+ /*
+ * Check for an XPG3-style %n$ specification. Note: there
+ * must not be a mixture of XPG3 specs and non-XPG3 specs
+ * in the same format string.
+ */
+
+ value = zend_u_strtoul(format-1, &end, 10);
+ if (*end != '$') {
+ goto notXpg;
+ }
+ format = end+1;
+ ch = format++;
+ gotXpg = 1;
+ if (gotSequential) {
+ goto mixedXPG;
+ }
+ objIndex = value - 1;
+ if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
+ goto badIndex;
+ } else if (numVars == 0) {
+ /*
+ * In the case where no vars are specified, the user can
+ * specify %9999$ legally, so we have to consider special
+ * rules for growing the assign array. 'value' is
+ * guaranteed to be > 0.
+ */
+
+ /* set a lower artificial limit on this
+ * in the interest of security and resource friendliness
+ * 255 arguments should be more than enough. - cc
+ */
+ if (value > SCAN_MAX_ARGS) {
+ goto badIndex;
+ }
+
+ xpgSize = (xpgSize > value) ? xpgSize : value;
+ }
+ goto xpgCheckDone;
+ }
+
+ notXpg:
+ gotSequential = 1;
+ if (gotXpg) {
+ mixedXPG:
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
+ goto error;
+ }
+
+ xpgCheckDone:
+ /*
+ * Parse any width specifier.
+ */
+
+ if (u_isdigit(*ch)) {
+ value = zend_u_strtoul(format-1, &format, 10);
+ flags |= SCAN_WIDTH;
+ ch = format++;
+ }
+
+ /*
+ * Ignore size specifier.
+ */
+
+ if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
+ ch = format++;
+ }
+
+ if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
+ goto badIndex;
+ }
+
+ /*
+ * Handle the various field types.
+ */
+
+ switch (*ch) {
+ case 'n':
+ case 'd':
+ case 'D':
+ case 'i':
+ case 'o':
+ case 'x':
+ case 'X':
+ case 'u':
+ case 'f':
+ case 'e':
+ case 'E':
+ case 'g':
+ case 's':
+ break;
+ case 'c':
+ /* we differ here with the TCL implementation in allowing for */
+ /* a character width specification, to be more consistent with */
+ /* ANSI. since Zend auto allocates space for vars, this is no */
+ /* problem - cc */
+ /*
+ if (flags & SCAN_WIDTH) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion");
+ goto error;
+ }
+ */
+ break;
+ case '[':
+ if (*format == '\0') {
+ goto badSet;
+ }
+ ch = format++;
+ if (*ch == '^') {
+ if (*format == '\0') {
+ goto badSet;
+ }
+ ch = format++;
+ }
+ if (*ch == ']') {
+ if (*format == '\0') {
+ goto badSet;
+ }
+ ch = format++;
+ }
+ while (*ch != ']') {
+ if (*format == '\0') {
+ goto badSet;
+ }
+ ch = format++;
+ }
+ break;
+ badSet:
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string");
+ goto error;
+ default:
+ {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
+ goto error;
+ }
+ }
+ if (!(flags & SCAN_SUPPRESS)) {
+ if (objIndex >= nspace) {
+ /*
+ * Expand the nassign buffer. If we are using XPG specifiers,
+ * make sure that we grow to a large enough size. xpgSize is
+ * guaranteed to be at least one larger than objIndex.
+ */
+ value = nspace;
+ if (xpgSize) {
+ nspace = xpgSize;
+ } else {
+ nspace += STATIC_LIST_SIZE;
+ }
+ if (nassign == staticAssign) {
+ nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
+ for (i = 0; i < STATIC_LIST_SIZE; ++i) {
+ nassign[i] = staticAssign[i];
+ }
+ } else {
+ nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
+ }
+ for (i = value; i < nspace; i++) {
+ nassign[i] = 0;
+ }
+ }
+ nassign[objIndex]++;
+ objIndex++;
+ }
+ } /* while (*format != '\0') */
+
+ /*
+ * Verify that all of the variable were assigned exactly once.
+ */
+
+ if (numVars == 0) {
+ if (xpgSize) {
+ numVars = xpgSize;
+ } else {
+ numVars = objIndex;
+ }
+ }
+ if (totalSubs) {
+ *totalSubs = numVars;
+ }
+ for (i = 0; i < numVars; i++) {
+ if (nassign[i] > 1) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
+ goto error;
+ } else if (!xpgSize && (nassign[i] == 0)) {
+ /*
+ * If the space is empty, and xpgSize is 0 (means XPG wasn't
+ * used, and/or numVars != 0), then too many vars were given
+ */
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers");
+ goto error;
+ }
+ }
+
+ if (nassign != staticAssign) {
+ efree((char *)nassign);
+ }
+ return SCAN_SUCCESS;
+
+badIndex:
+ if (gotXpg) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range");
+ } else {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers");
+ }
+
+error:
+ if (nassign != staticAssign) {
+ efree((char *)nassign);
+ }
+ return SCAN_ERROR_INVALID_FORMAT;
+#undef STATIC_LIST_SIZE
+}
+/* }}} */
+
/* {{{ php_sscanf_internal
* This is the internal function which does processing on behalf of
* both sscanf() and fscanf()
@@ -1253,8 +1717,682 @@ done:
}
/* }}} */
+/* {{{ php_u_sscanf_internal
+ * This is the internal function which does processing on behalf of
+ * both sscanf() and fscanf()
+ *
+ * parameters :
+ * string literal string to be processed
+ * format format string
+ * argCount total number of elements in the args array
+ * args arguments passed in from user function (f|s)scanf
+ * varStart offset (in args) of 1st variable passed in to (f|s)scanf
+ * return_value set with the results of the scan
+ */
+
+PHPAPI int php_u_sscanf_internal( UChar *string, UChar *format,
+ int argCount, zval ***args,
+ int varStart, zval **return_value TSRMLS_DC)
+{
+ int numVars, nconversions, totalVars = -1;
+ int i, value, result;
+ int objIndex;
+ UChar *end, *baseString;
+ zval **current;
+ char op = 0;
+ int base = 0;
+ int underflow = 0;
+ size_t width;
+ long (*fn)() = NULL;
+ UChar *ch, sch;
+ int flags;
+ UChar buf[64]; /* Temporary buffer to hold scanned
+ * number strings before they are
+ * passed to strtoul. */
+
+
+ /* do some sanity checking */
+ if ((varStart > argCount) || (varStart < 0)){
+ varStart = SCAN_MAX_ARGS + 1;
+ }
+ numVars = argCount - varStart;
+ if (numVars < 0) {
+ numVars = 0;
+ }
+
+#if 0
+ zend_printf("
in sscanf_internal :
string is \"%s\", format = \"%s\"
NumVars = %d. VarStart = %d
-------------------------
",
+ string, format, numVars, varStart);
+#endif
+ /*
+ * Check for errors in the format string.
+ */
+ if (u_ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
+ scan_set_error_return( numVars, return_value );
+ return SCAN_ERROR_INVALID_FORMAT;
+ }
+
+ objIndex = numVars ? varStart : 0;
+
+ /*
+ * If any variables are passed, make sure they are all passed by reference
+ */
+ if (numVars) {
+ for (i = varStart;i < argCount;i++){
+ if ( ! PZVAL_IS_REF( *args[ i ] ) ) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i);
+ scan_set_error_return(numVars, return_value);
+ return SCAN_ERROR_VAR_PASSED_BYVAL;
+ }
+ }
+ }
+
+
+ /*
+ * Allocate space for the result objects. Only happens when no variables
+ * are specified
+ */
+
+ if (!numVars) {
+ zval *tmp;
+
+ /* allocate an array for return */
+ array_init(*return_value);
+
+ for (i = 0; i < totalVars; i++) {
+ MAKE_STD_ZVAL(tmp);
+ ZVAL_NULL(tmp);
+ if (add_next_index_zval(*return_value, tmp) == FAILURE) {
+ scan_set_error_return(0, return_value);
+ return FAILURE;
+ }
+ }
+ }
+
+ baseString = string;
+
+ /*
+ * Iterate over the format string filling in the result objects until
+ * we reach the end of input, the end of the format string, or there
+ * is a mismatch.
+ */
+
+ nconversions = 0;
+ /* note ! - we need to limit the loop for objIndex to keep it in bounds */
+
+ while (*format != 0x00) {
+
+ ch = format++;
+
+ flags = 0;
+
+ /*
+ * If we see whitespace in the format, skip whitespace in the string.
+ */
+
+ if ( u_isspace(*ch) ) {
+ sch = *string;
+ while ( u_isspace(sch) ) {
+ if (*string == 0x00) {
+ goto done;
+ }
+ string++;
+ sch = *string;
+ }
+ continue;
+ }
+
+ if (*ch != 0x25 /* '%' */) {
+ literal:
+ if (*string == 0x00) {
+ underflow = 1;
+ goto done;
+ }
+ sch = *string;
+ string++;
+ if (*ch != sch) {
+ goto done;
+ }
+ continue;
+ }
+
+ ch = format++;
+ if (*ch == 0x25 /* '%' */) {
+ goto literal;
+ }
+
+ /*
+ * Check for assignment suppression ('*') or an XPG3-style
+ * assignment ('%n$').
+ */
+
+ if (*ch == 0x2A /* '*' */) {
+ flags |= SCAN_SUPPRESS;
+ ch = format++;
+ } else if ( u_isdigit(*ch)) {
+ value = zend_u_strtoul(format-1, &end, 10);
+ if (*end == 0x24 /* '$' */) {
+ format = end+1;
+ ch = format++;
+ objIndex = varStart + value - 1;
+ }
+ }
+
+ /*
+ * Parse any width specifier.
+ */
+
+ if ( u_isdigit(*ch)) {
+ width = zend_u_strtoul(format-1, &format, 10);
+ ch = format++;
+ } else {
+ width = 0;
+ }
+
+ /*
+ * Ignore size specifier.
+ */
+
+ if ((*ch == 0x6C /* 'l' */) || (*ch == 0x4C /* 'L' */) || (*ch == 0x68 /* 'h' */)) {
+ ch = format++;
+ }
+
+ /*
+ * Handle the various field types.
+ */
+
+ switch (*ch) {
+ case 0x6E /* 'n' */:
+ if (!(flags & SCAN_SUPPRESS)) {
+ if (numVars && objIndex >= argCount) {
+ break;
+ } else if (numVars) {
+ zend_uint refcount;
+
+ current = args[objIndex++];
+ refcount = (*current)->refcount;
+ zval_dtor( *current );
+ ZVAL_LONG( *current, (long)(string - baseString) );
+ (*current)->refcount = refcount;
+ (*current)->is_ref = 1;
+ } else {
+ add_index_long(*return_value, objIndex++, string - baseString);
+ }
+ }
+ nconversions++;
+ continue;
+
+ case 0x64 /* 'd' */:
+ case 0x44 /* 'D' */:
+ op = 'i';
+ base = 10;
+ fn = (long (*)())strtol;
+ break;
+ case 0x69 /* 'i' */:
+ op = 'i';
+ base = 0;
+ fn = (long (*)())strtol;
+ break;
+ case 0x6F /* 'o' */:
+ op = 'i';
+ base = 8;
+ fn = (long (*)())strtol;
+ break;
+ case 0x78 /* 'x' */:
+ case 0x58 /* 'X' */:
+ op = 'i';
+ base = 16;
+ fn = (long (*)())strtol;
+ break;
+ case 0x75 /* 'u' */:
+ op = 'i';
+ base = 10;
+ flags |= SCAN_UNSIGNED;
+ fn = (long (*)())zend_u_strtoul;
+ break;
+
+ case 0x66 /* 'f' */:
+ case 0x65 /* 'e' */:
+ case 0x45 /* 'E' */:
+ case 0x67 /* 'g' */:
+ op = 'f';
+ break;
+
+ case 0x73 /* 's' */:
+ op = 's';
+ break;
+
+ case 0x63 /* 'c' */:
+ op = 's';
+ flags |= SCAN_NOSKIP;
+ /*-cc-*/
+ if (0 == width) {
+ width = 1;
+ }
+ /*-cc-*/
+ break;
+ case 0x5B /* '[' */:
+ op = '[';
+ flags |= SCAN_NOSKIP;
+ break;
+ } /* switch */
+
+ /*
+ * At this point, we will need additional characters from the
+ * string to proceed.
+ */
+
+ if (*string == 0x00) {
+ underflow = 1;
+ goto done;
+ }
+
+ /*
+ * Skip any leading whitespace at the beginning of a field unless
+ * the format suppresses this behavior.
+ */
+
+ if (!(flags & SCAN_NOSKIP)) {
+ while (*string != 0x00) {
+ sch = *string;
+ if (! u_isspace(sch) ) {
+ break;
+ }
+ string++;
+ }
+ if (*string == 0x00) {
+ underflow = 1;
+ goto done;
+ }
+ }
+
+ /*
+ * Perform the requested scanning operation.
+ */
+
+ switch (op) {
+ case 'c':
+ case 's':
+ /*
+ * Scan a string up to width characters or whitespace.
+ */
+
+ if (width == 0) {
+ width = (size_t) ~0;
+ }
+ end = string;
+ while (*end != 0x00) {
+ sch = *end;
+ if ( u_isspace( sch ) ) {
+ break;
+ }
+ end++;
+ if (--width == 0) {
+ break;
+ }
+ }
+ if (!(flags & SCAN_SUPPRESS)) {
+ if (numVars && objIndex >= argCount) {
+ break;
+ } else if (numVars) {
+ zend_uint refcount;
+
+ current = args[objIndex++];
+ refcount = (*current)->refcount;
+ zval_dtor( *current );
+ ZVAL_UNICODEL( *current, string, end-string, 1);
+ (*current)->refcount = refcount;
+ (*current)->is_ref = 1;
+ } else {
+ add_index_unicodel( *return_value, objIndex++, string, end-string, 1);
+ }
+ }
+ string = end;
+ break;
+
+ case '[': {
+ u_CharSet cset;
+
+ if (width == 0) {
+ width = (size_t) ~0;
+ }
+ end = string;
+
+ format = u_BuildCharSet(&cset, format);
+ while (*end != 0x00) {
+ sch = *end;
+ if (!u_CharInSet(&cset, sch)) {
+ break;
+ }
+ end++;
+ if (--width == 0) {
+ break;
+ }
+ }
+ u_ReleaseCharSet(&cset);
+
+ if (string == end) {
+ /*
+ * Nothing matched the range, stop processing
+ */
+ goto done;
+ }
+ if (!(flags & SCAN_SUPPRESS)) {
+ if (numVars && objIndex >= argCount) {
+ break;
+ } else if (numVars) {
+ current = args[objIndex++];
+ zval_dtor( *current );
+ ZVAL_UNICODEL( *current, string, end-string, 1);
+ } else {
+ add_index_unicodel(*return_value, objIndex++, string, end-string, 1);
+ }
+ }
+ string = end;
+
+ break;
+ }
+ /*
+ case 'c':
+ / Scan a single character./
+
+ sch = *string;
+ string++;
+ if (!(flags & SCAN_SUPPRESS)) {
+ if (numVars) {
+ char __buf[2];
+ __buf[0] = sch;
+ __buf[1] = '\0';;
+ current = args[objIndex++];
+ convert_to_string_ex( current );
+ ZVAL_STRINGL( *current, __buf, 1, 1);
+ } else {
+ add_index_stringl(*return_value, objIndex++, &sch, 1, 1);
+ }
+ }
+ break;
+ */
+ case 'i':
+ /*
+ * Scan an unsigned or signed integer.
+ */
+
+ /*-cc-*/
+ buf[0] = 0x00;
+ /*-cc-*/
+ if ((width == 0) || (width > sizeof(buf) - 1)) {
+ width = sizeof(buf) - 1;
+ }
+
+ flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
+ for (end = buf; width > 0; width--) {
+ switch (*string) {
+ /*
+ * The 0 digit has special meaning at the beginning of
+ * a number. If we are unsure of the base, it
+ * indicates that we are in base 8 or base 16 (if it is
+ * followed by an 'x').
+ */
+ case 0x30 /* '0' */:
+ /*-cc-*/
+ if (base == 16) {
+ flags |= SCAN_XOK;
+ }
+ /*-cc-*/
+ if (base == 0) {
+ base = 8;
+ flags |= SCAN_XOK;
+ }
+ if (flags & SCAN_NOZERO) {
+ flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
+ } else {
+ flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
+ }
+ goto addToInt;
+
+ case 0x31 /* '1' */: case 0x32 /* '2' */: case 0x33 /* '3' */: case 0x34 /* '4' */:
+ case 0x35 /* '5' */: case 0x36 /* '6' */: case 0x37 /* '7' */:
+ if (base == 0) {
+ base = 10;
+ }
+ flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
+ goto addToInt;
+
+ case 0x38 /* '8' */: case 0x39 /* '9' */:
+ if (base == 0) {
+ base = 10;
+ }
+ if (base <= 8) {
+ break;
+ }
+ flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
+ goto addToInt;
+
+ case 0x41 /* 'A' */: case 0x42 /* 'B' */: case 0x43 /* 'C' */:
+ case 0x44 /* 'D' */: case 0x45 /* 'E' */: case 0x46 /* 'F' */:
+ case 0x61 /* 'a' */: case 0x62 /* 'b' */: case 0x63 /* 'c' */:
+ case 0x64 /* 'd' */: case 0x65 /* 'e' */: case 0x66 /* 'f' */:
+ if (base <= 10) {
+ break;
+ }
+ flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
+ goto addToInt;
+
+ case 0x2B /* '+' */: case 0x2D /* '-' */:
+ if (flags & SCAN_SIGNOK) {
+ flags &= ~SCAN_SIGNOK;
+ goto addToInt;
+ }
+ break;
+
+ case 0x78 /* 'x' */: case 0x58 /* 'X' */:
+ if ((flags & SCAN_XOK) && (end == buf+1)) {
+ base = 16;
+ flags &= ~SCAN_XOK;
+ goto addToInt;
+ }
+ break;
+ }
+
+ /*
+ * We got an illegal character so we are done accumulating.
+ */
+
+ break;
+
+ addToInt:
+ /*
+ * Add the character to the temporary buffer.
+ */
+ *end++ = *string++;
+ if (*string == 0x00) {
+ break;
+ }
+ }
+
+ /*
+ * Check to see if we need to back up because we only got a
+ * sign or a trailing x after a 0.
+ */
+
+ if (flags & SCAN_NODIGITS) {
+ if (*string == 0x00) {
+ underflow = 1;
+ }
+ goto done;
+ } else if (end[-1] == 0x78 /* 'x' */ || end[-1] == 0x58 /* 'X' */) {
+ end--;
+ string--;
+ }
+
+
+ /*
+ * Scan the value from the temporary buffer. If we are
+ * returning a large unsigned value, we have to convert it back
+ * to a string since PHP only supports signed values.
+ */
+
+ if (!(flags & SCAN_SUPPRESS)) {
+ *end = 0x00;
+ value = (int) (*fn)(buf, NULL, base);
+ if ((flags & SCAN_UNSIGNED) && (value < 0)) {
+ u_sprintf(buf, "%u", value); /* INTL: ISO digit */
+ if (numVars && objIndex >= argCount) {
+ break;
+ } else if (numVars) {
+ /* change passed value type to string */
+ current = args[objIndex++];
+ convert_to_unicode( *current );
+ ZVAL_UNICODE( *current, buf, 1 );
+ } else {
+ add_index_unicode(*return_value, objIndex++, buf, 1);
+ }
+ } else {
+ if (numVars && objIndex >= argCount) {
+ break;
+ } else if (numVars) {
+ current = args[objIndex++];
+ convert_to_long( *current );
+ Z_LVAL(**current) = value;
+ } else {
+ add_index_long(*return_value, objIndex++, value);
+ }
+ }
+ }
+
+ break;
+
+ case 'f':
+ /*
+ * Scan a floating point number
+ */
+ buf[0] = 0x00; /* call me pedantic */
+ if ((width == 0) || (width > sizeof(buf) - 1)) {
+ width = sizeof(buf) - 1;
+ }
+ flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
+ for (end = buf; width > 0; width--) {
+ switch (*string) {
+ case 0x30 /* '0' */: case 0x31 /* '1' */: case 0x32 /* '2' */: case 0x33 /* '3' */:
+ case 0x34 /* '4' */: case 0x35 /* '5' */: case 0x36 /* '6' */: case 0x37 /* '7' */:
+ case 0x38 /* '8' */: case 0x39 /* '9' */:
+ flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
+ goto addToFloat;
+ case 0x2B /* '+' */: case 0x2D /* '-' */:
+ if (flags & SCAN_SIGNOK) {
+ flags &= ~SCAN_SIGNOK;
+ goto addToFloat;
+ }
+ break;
+ case 0x2E /* '.' */:
+ if (flags & SCAN_PTOK) {
+ flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
+ goto addToFloat;
+ }
+ break;
+ case 0x65 /* 'e' */: case 0x45 /* 'E' */:
+ /*
+ * An exponent is not allowed until there has
+ * been at least one digit.
+ */
+
+ if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
+ flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
+ | SCAN_SIGNOK | SCAN_NODIGITS;
+ goto addToFloat;
+ }
+ break;
+ }
+
+ /*
+ * We got an illegal character so we are done accumulating.
+ */
+
+ break;
+
+ addToFloat:
+ /*
+ * Add the character to the temporary buffer.
+ */
+
+ *end++ = *string++;
+ if (*string == 0x00) {
+ break;
+ }
+ }
+
+ /*
+ * Check to see if we need to back up because we saw a
+ * trailing 'e' or sign.
+ */
+
+ if (flags & SCAN_NODIGITS) {
+ if (flags & SCAN_EXPOK) {
+ /*
+ * There were no digits at all so scanning has
+ * failed and we are done.
+ */
+ if (*string == 0x00) {
+ underflow = 1;
+ }
+ goto done;
+ }
+
+ /*
+ * We got a bad exponent ('e' and maybe a sign).
+ */
+
+ end--;
+ string--;
+ if (*end != 0x65 /* 'e' */ && *end != 0x45 /* 'E' */) {
+ end--;
+ string--;
+ }
+ }
+
+ /*
+ * Scan the value from the temporary buffer.
+ */
+
+ if (!(flags & SCAN_SUPPRESS)) {
+ double dvalue;
+ *end = 0x00;
+ dvalue = zend_u_strtod(buf, NULL);
+ if (numVars && objIndex >= argCount) {
+ break;
+ } else if (numVars) {
+ current = args[objIndex++];
+ convert_to_double( *current );
+ Z_DVAL_PP( current ) = dvalue;
+ } else {
+ add_index_double( *return_value, objIndex++, dvalue );
+ }
+ }
+ break;
+ } /* switch (op) */
+ nconversions++;
+ } /* while (*format != '\0') */
+
+done:
+ result = SCAN_SUCCESS;
+
+ if (underflow && (0==nconversions)) {
+ scan_set_error_return( numVars, return_value );
+ result = SCAN_ERROR_EOF;
+ } else if (numVars) {
+ convert_to_long( *return_value );
+ Z_LVAL_PP(return_value) = nconversions;
+ } else if (nconversions < totalVars) {
+ /* to do : not all elements converted. we need to prune the list - cc
+ */
+ }
+
+ return result;
+}
+/* }}} */
+
/* the compiler choked when i tried to make this a macro */
-static inline void scan_set_error_return(int numVars, zval **return_value)
+static inline void scan_set_error_return(int numVars, zval **return_value) /* {{{ */
{
if (numVars) {
Z_TYPE_PP(return_value) = IS_LONG;
@@ -1265,7 +2403,7 @@ static inline void scan_set_error_return
convert_to_null( *return_value );
}
}
-
+/* }}} */
/*
* Local variables:
Index: ext/standard/scanf.h
===================================================================
RCS file: /repository/php-src/ext/standard/scanf.h,v
retrieving revision 1.16
diff -u -p -d -r1.16 scanf.h
--- ext/standard/scanf.h 1 Jan 2006 13:09:55 -0000 1.16
+++ ext/standard/scanf.h 25 Dec 2006 14:12:38 -0000
@@ -43,5 +43,7 @@
*/
PHPAPI int ValidateFormat(char *format, int numVars, int *totalVars);
PHPAPI int php_sscanf_internal(char *string, char *format, int argCount, zval ***args, int varStart, zval **return_value TSRMLS_DC);
+PHPAPI int u_ValidateFormat(UChar *format, int numVars, int *totalVars);
+PHPAPI int php_u_sscanf_internal(UChar *string, UChar *format, int argCount, zval ***args, int varStart, zval **return_value TSRMLS_DC);
#endif /* SCANF_H */
Index: ext/standard/string.c
===================================================================
RCS file: /repository/php-src/ext/standard/string.c,v
retrieving revision 1.627
diff -u -p -d -r1.627 string.c
--- ext/standard/string.c 21 Dec 2006 21:47:56 -0000 1.627
+++ ext/standard/string.c 25 Dec 2006 14:12:39 -0000
@@ -7346,13 +7346,23 @@ PHP_FUNCTION(sscanf)
WRONG_PARAM_COUNT;
}
- convert_to_string_ex(args[0]);
- convert_to_string_ex(args[1]);
+ if (UG(unicode)) {
+ convert_to_unicode_ex(args[0]);
+ convert_to_unicode_ex(args[1]);
- result = php_sscanf_internal(Z_STRVAL_PP(args[0]),
- Z_STRVAL_PP(args[1]),
- argc, args,
- 2, &return_value TSRMLS_CC);
+ result = php_u_sscanf_internal(Z_USTRVAL_PP(args[0]),
+ Z_USTRVAL_PP(args[1]),
+ argc, args,
+ 2, &return_value TSRMLS_CC);
+ } else {
+ convert_to_string_ex(args[0]);
+ convert_to_string_ex(args[1]);
+
+ result = php_sscanf_internal(Z_STRVAL_PP(args[0]),
+ Z_STRVAL_PP(args[1]),
+ argc, args,
+ 2, &return_value TSRMLS_CC);
+ }
efree(args);
if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
Index: ext/standard/file.c
===================================================================
RCS file: /repository/php-src/ext/standard/file.c,v
retrieving revision 1.479
diff -u -p -d -r1.479 file.c
--- ext/standard/file.c 21 Dec 2006 00:00:11 -0000 1.479
+++ ext/standard/file.c 25 Dec 2006 14:12:39 -0000
@@ -1246,16 +1246,15 @@ PHPAPI PHP_FUNCTION(fgetss)
}
/* }}} */
-/* {{{ proto mixed fscanf(resource stream, string format [, string ...])
+/* {{{ proto mixed fscanf(resource stream, string format [, string ...]) U
Implements a mostly ANSI compatible fscanf() */
-/* UTODO: Accept unicode contents */
PHP_FUNCTION(fscanf)
{
int result;
zval **file_handle, **format_string;
- size_t len;
int type;
char *buf;
+ UChar *u_buf;
void *what;
zval ***args;
@@ -1287,19 +1286,31 @@ PHP_FUNCTION(fscanf)
RETURN_FALSE;
}
+ if (UG(unicode)) {
+ u_buf = php_stream_u_get_line((php_stream *) what, NULL_ZSTR, 0, 0, NULL TSRMLS_CC);
+ if (u_buf == NULL) {
+ efree(args);
+ RETURN_FALSE;
+ }
- buf = php_stream_get_line((php_stream *) what, NULL_ZSTR, 0, &len);
- if (buf == NULL) {
- efree(args);
- RETURN_FALSE;
- }
+ convert_to_unicode_ex(format_string);
+ result = php_u_sscanf_internal(u_buf, Z_USTRVAL_PP(format_string),
+ argCount, args, 2, &return_value TSRMLS_CC);
+ efree(u_buf);
+ } else {
+ buf = php_stream_get_line((php_stream *) what, NULL_ZSTR, 0, NULL);
+ if (buf == NULL) {
+ efree(args);
+ RETURN_FALSE;
+ }
- convert_to_string_ex(format_string);
- result = php_sscanf_internal(buf, Z_STRVAL_PP(format_string),
- argCount, args, 2, &return_value TSRMLS_CC);
+ convert_to_string_ex(format_string);
+ result = php_sscanf_internal(buf, Z_STRVAL_PP(format_string),
+ argCount, args, 2, &return_value TSRMLS_CC);
+ efree(buf);
+ }
efree(args);
- efree(buf);
if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
WRONG_PARAM_COUNT;
Index: Zend/zend_strtol.c
===================================================================
RCS file: /repository/ZendEngine2/zend_strtol.c,v
retrieving revision 1.4
diff -u -p -d -r1.4 zend_strtol.c
--- Zend/zend_strtol.c 7 Dec 2006 18:56:25 -0000 1.4
+++ Zend/zend_strtol.c 25 Dec 2006 14:12:39 -0000
@@ -31,6 +31,7 @@
* SUCH DAMAGE.
*/
+#include
#include
#include
#include
@@ -39,13 +40,12 @@
#include
#include
-/*
+/* long zend_u_strtol (const UChar *nptr, UChar **endptr, int base) {{{
* Convert a Unicode string to a long integer.
*
* Ignores `locale' stuff.
*/
-long
-zend_u_strtol(nptr, endptr, base)
+ZEND_API long zend_u_strtol(nptr, endptr, base)
const UChar *nptr;
UChar **endptr;
register int base;
@@ -56,6 +56,14 @@ zend_u_strtol(nptr, endptr, base)
register unsigned long cutoff;
register int neg = 0, any, cutlim;
+ if (s == NULL) {
+ errno = ERANGE;
+ if (endptr != NULL) {
+ *endptr = NULL;
+ }
+ return 0;
+ }
+
/*
* Skip white space and pick up leading +/- sign if any.
* If base is 0, allow 0x for hex and 0 for octal, else
@@ -128,3 +136,109 @@ zend_u_strtol(nptr, endptr, base)
*endptr = (UChar *)(any ? s - 1 : nptr);
return (acc);
}
+/* }}} */
+
+/* unsigned long zend_u_strtoul (const UChar *nptr, UChar **endptr, int base) {{{
+ * Convert a Unicode string to a unsigned long integer.
+ *
+ * Ignores `locale' stuff.
+ */
+ZEND_API unsigned long zend_u_strtoul(nptr, endptr, base)
+ const UChar *nptr;
+ UChar **endptr;
+ register int base;
+{
+ register const UChar *s = nptr;
+ register unsigned long acc;
+ register UChar c;
+ register unsigned long cutoff;
+ register int neg = 0, any, cutlim;
+
+ if (s == NULL) {
+ errno = ERANGE;
+ if (endptr != NULL) {
+ *endptr = NULL;
+ }
+ return 0;
+ }
+
+ /*
+ * Skip white space and pick up leading +/- sign if any.
+ * If base is 0, allow 0x for hex and 0 for octal, else
+ * assume decimal; if base is already 16, allow 0x.
+ */
+ do {
+ c = *s++;
+ } while (u_isspace(c));
+ if (c == 0x2D /*'-'*/) {
+ neg = 1;
+ c = *s++;
+ } else if (c == 0x2B /*'+'*/)
+ c = *s++;
+ if ((base == 0 || base == 16) &&
+ (c == 0x30 /*'0'*/)
+ && (*s == 0x78 /*'x'*/ || *s == 0x58 /*'X'*/)) {
+ c = s[1];
+ s += 2;
+ base = 16;
+ }
+ if (base == 0)
+ base = (c == 0x30 /*'0'*/) ? 8 : 10;
+
+ /*
+ * Compute the cutoff value between legal numbers and illegal
+ * numbers. That is the largest legal value, divided by the
+ * base. An input number that is greater than this value, if
+ * followed by a legal input character, is too big. One that
+ * is equal to this value may be valid or not; the limit
+ * between valid and invalid numbers is then based on the last
+ * digit. For instance, if the range for longs is
+ * [-2147483648..2147483647] and the input base is 10,
+ * cutoff will be set to 214748364 and cutlim to either
+ * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
+ * a value > 214748364, or equal but the next digit is > 7 (or 8),
+ * the number is too big, and we will return a range error.
+ *
+ * Set any if any `digits' consumed; make it negative to indicate
+ * overflow.
+ */
+ cutoff = (unsigned long)ULONG_MAX / (unsigned long)base;
+ cutlim = (unsigned long)ULONG_MAX % (unsigned long)base;
+ for (acc = 0, any = 0;; c = *s++) {
+ if (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/)
+ c -= 0x30 /*'0'*/;
+ else if (c >= 0x41 /*'A'*/ && c <= 0x5A /*'Z'*/)
+ c -= 0x41 /*'A'*/ - 10;
+ else if (c >= 0x61 /*'a'*/ && c <= 0x7A /*'z'*/)
+ c -= 0x61 /*'a'*/ - 10;
+ else
+ break;
+ if (c >= base)
+ break;
+
+ if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
+ any = -1;
+ else {
+ any = 1;
+ acc *= base;
+ acc += c;
+ }
+ }
+ if (any < 0) {
+ acc = ULONG_MAX;
+ errno = ERANGE;
+ } else if (neg)
+ acc = -acc;
+ if (endptr != NULL)
+ *endptr = (UChar *)(any ? s - 1 : nptr);
+ return (acc);
+}
+/* }}} */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * indent-tabs-mode: t
+ * End:
+ */
Index: Zend/zend_operators.h
===================================================================
RCS file: /repository/ZendEngine2/zend_operators.h,v
retrieving revision 1.116
diff -u -p -d -r1.116 zend_operators.h
--- Zend/zend_operators.h 20 Dec 2006 19:08:23 -0000 1.116
+++ Zend/zend_operators.h 25 Dec 2006 14:12:39 -0000
@@ -65,6 +65,7 @@ ZEND_API int is_smaller_or_equal_functio
ZEND_API zend_bool instanceof_function_ex(zend_class_entry *instance_ce, zend_class_entry *ce, zend_bool interfaces_only TSRMLS_DC);
ZEND_API zend_bool instanceof_function(zend_class_entry *instance_ce, zend_class_entry *ce TSRMLS_DC);
ZEND_API long zend_u_strtol(const UChar *nptr, UChar **endptr, int base);
+ZEND_API unsigned long zend_u_strtoul(const UChar *nptr, UChar **endptr, int base);
ZEND_API double zend_u_strtod(const UChar *nptr, UChar **endptr);
END_EXTERN_C()
Index: main/php_streams.h
===================================================================
RCS file: /repository/php-src/main/php_streams.h,v
retrieving revision 1.118
diff -u -p -d -r1.118 php_streams.h
--- main/php_streams.h 9 Nov 2006 01:06:45 -0000 1.118
+++ main/php_streams.h 25 Dec 2006 14:12:39 -0000
@@ -335,7 +335,7 @@ PHPAPI UChar *php_stream_get_record_unic
PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t *pmax_bytes, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC);
-#define php_stream_u_get_line(stream, buf, maxlen_buf, maxlen_chars, buf_type) _php_stream_u_get_line((stream), (buf), (maxlen_buf), (maxlen_chars), (buf_type) TSRMLS_CC)
+#define php_stream_u_get_line(stream, buf, maxlen_buf, maxlen_chars, retlen) _php_stream_get_line((stream), IS_UNICODE, (buf), (maxlen_buf), (maxlen_chars), (retlen) TSRMLS_CC)
/* CAREFUL! this is equivalent to puts NOT fputs! */
PHPAPI int _php_stream_puts(php_stream *stream, char *buf TSRMLS_DC);