Ask a Question related to UNIX Programming, Design and Development.
-
Lee Tracey #1
Errno 25 ENOTTY then SIGSEGV, socket on accept call? Help.
Hi All,
I have been scouring newsgroups for ages now trying to find a solution
to my problem. Have to resort to posting the question:
I am writing a basic server/multiple client program primarily in c,
utilising TCP sockets (all necessary code below - except some of the
client code).
The problem I have is that the server SIGSEGV's when "accept()" is
called.
I notice that errno is -1 into accept() and 25 out of it.
This is the first time I have attempted this, It was working
temperementally before, but now not at all. It always collapses.
I noticed that it used to only fall when a "job" was launched on
certain machines, from certain machines. But now all the time.
Can anyone spot what the problem is?
Sorry for the large amount of code, I felt is was best to post all of
it, for completeness...
Thanks Lee Tracey. All help much appreciated.
================================================== ============================
Some code notes:
1) 61ci27nvrun_sections_ is the starting point, with all arguments OK.
2) Although 61ci27nvreset_ is run prior to this to open a socket and
set
re-usability
3) I've only included the server code here. The client calls
61ci_get_active_socket()
When connect in that routine, the server comes out of poll and goes
into
accept where it seg violates and has errno 25 just before hand.
================================================== ============================
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <netinet/in.h>
#include <netdb.h>
#include <unistd.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <stropts.h>
#include <signal.h>
#include <poll.h>
#include <sys/select.h>
#include <61ci/buffer.h>
#define FALSE 0
#define TRUE 1
#define HOSTNAMELEN 20
#define MAXMACHINES 64
#define MAXJOBS 100
#define min(A,B) ( (A) < (B) ? (A) : (B) )
#define max(A,B) ( (A) > (B) ? (A) : (B) )
#define MAXMSGLEN 10008
extern void (**jb56_signal_handlers) ( int );
static char wait_flag = FALSE;
struct pollfd fds[MAXMACHINES+1];
int jobstatus[MAXJOBS], jobsection[MAXJOBS], jobhost[MAXJOBS],
jobpid[MAXJOBS];
int numjobs, numhosts;
unsigned short int 30cs_port;
int 30cs_sock;
char remhostname[MAXMACHINES][HOSTNAMELEN];
char *solver[] = {"27nv","Q253"};
int hoststatus[MAXMACHINES];
/* Controlling integers definition */
/* jobstatus : one for each job needed to run */
/* : 0= no job */
/* : 1= awaiting run */
/* : 1= job launched */
/* : 3= connected to remote machne */
/* : 4= completed */
/* : 5= error */
/* jobsection : the stream section number that */
/* is being run */
/* jobhost : the host no. job is running on */
/* numjobs : the unmber of jobs in total */
/* numhosts : the number of hosts in total */
/* remhostname : array of remote host names */
/* hoststatus : array of status's for hosts */
/* : 0= no machine available */
/* : 1= available */
/* : 2= in use */
#define JOBNONE 0
#define JOBAWAITING 1
#define JOBLAUNCHED 2
#define JOBCONNECTED 3
#define JOBCOMPLETED 4
#define JOBERROR 5
#define HOSTNOTAVAIL 0
#define HOSTAVAIL 1
#define HOSTINUSE 2
void 61ci27nvloadprog ( int , int , int *, int * );
void 61ci27nvtidy ( void );
void 61ci27nvreset_ ( int *iret )
{
int i, i_reuse_addr;
const char reuse_addr='1';
for (i=0; i<=MAXMACHINES; i++)
{
strcpy(remhostname[i], "");
hoststatus[i] = HOSTNOTAVAIL;
}
for (i=0; i<=MAXJOBS; i++)
{
jobstatus[i] = JOBNONE;
jobpid[i] = 0;
}
numhosts=0;
numjobs=0;
/* Open a socket here... */
if ( 61ci_get_passive_socket ( &30cs_port, &30cs_sock ) < 0 ){
*iret = 999;
return;
}
/* Set up socket so we can re-bind to it without TIME_WAIT problems
*/
setsockopt (30cs_sock, SOL_SOCKET, SO_REUSEADDR, &reuse_addr,
sizeof(reuse_addr));
}
int 61ci27nvgetnumavail ( void )
{
int i,n;
for (i=0,n=0; i<=MAXMACHINES; i++)
if (hoststatus[i]==HOSTAVAIL) n++;
return(n);
}
int 61ci27nvgetnumok ( void )
{
int i,n;
for (i=0,n=0; i<=MAXMACHINES; i++)
if (hoststatus[i]==HOSTAVAIL || hoststatus[i]==HOSTINUSE) n++;
return(n);
}
int 61ci27nvjobsongo ( void )
{
int numjobsongo, i;
numjobsongo = 0;
for (i=0; i<=MAXJOBS; i++)
{
if (jobstatus[i]==JOBCONNECTED || jobstatus[i]==JOBLAUNCHED)
numjobsongo++;
}
return(numjobsongo);
}
int 61ci27nvgetmachine ( int last )
{
int i;
for(i=last; i<=MAXMACHINES; i++)
if (hoststatus[i]==HOSTAVAIL)
return(i);
return(-1);
}
int 61ci27nvgetnextjob( void )
{
int i;
for (i=0; i<=MAXJOBS; i++)
if (jobstatus[i]==JOBAWAITING)
return(i);
return(-1);
}
void 61ci27nv_kill_job( i )
{
int cmdlen;
char *cmdlin;
if (jobstatus[i] == JOBCONNECTED && jobpid[i] > 0)
/* can only do this for "connected" m/c's as */
/* I don't have PID for "launched machines yet */
{
cmdlen = strlen(remhostname[jobhost[i]])+21;
cmdlin = (char *) calloc ( (size_t) cmdlen, sizeof(char));
sprintf ( cmdlin, "/bin/rsh %s \"kill -9 %i\"",
remhostname[jobhost[i]], jobpid[i] );
printf("Killing stream section %i on %s\n", jobsection[i],
remhostname[jobhost[i]]);
system(cmdlin);
free(cmdlin);
jobstatus[i] = JOBNONE;
close(fds[i].fd);
fds[i].fd=0;
}
}
void 61ci27nvkill_remaining_jobs( void )
{
int i, cmdlen;
/* Loop through jobs being processed and kill them off */
for (i=0; i<MAXJOBS; i++)
{
if (jobstatus[i] == JOBCONNECTED) /* can only do this for
"connected" m/c's as */
/* I don't have PID for "launched machines yet */
61ci27nv_kill_job( i );
}
61ci27nvtidy();
}
void 61ci27nvtidy( void )
{
/* close socket here */
close(30cs_sock);
}
void 61ci27nvrun_sections_ (int *ind,int *nob,int *sec,int *n1s,int
*geom,
int *n1g,int *aero, int *n1a,int *n2a,int *bc,
int *n1b,int *prof,int *n1p,int *n2p,int *npttotarr,
int *nptprarr,int *intSolver,int *iret)
{
int timeout, pollval, errno, i, length, iLoop, 61ciloop;
int 61ci_buflen, bufpart, ierroccurred,numhostok,irun;
int numhostavail,nextjob,intmachine,last,j,ishutdown,n umjobsongo;
char *commbuf;
struct sockaddr_in server;
/* loop until all jobs are completed or we run out of machines due
to errors or timeouts! */
for (iLoop=TRUE; iLoop;){
/* ========= LAUNCHING LOOP ========= */
/* ========= LAUNCHING LOOP ========= */
/* ========= LAUNCHING LOOP ========= */
/* Get number of machines available */
numhostavail = 61ci27nvgetnumavail();
numhostok = 61ci27nvgetnumok();
if (numhostok<=0)
{
printf("There are no hosts available to run any jobs because of
previous errors. Exiting.\n");
*iret = 999;
break;
}
/* Run appropriate number of sections */
for (i=0; i<min(numhostavail,numjobs); i++)
{
last = 0;
intmachine = 61ci27nvgetmachine ( last );
if (intmachine==-1)
break;
nextjob = 61ci27nvgetnextjob();
if (nextjob==-1)
break;
printf("Launching %s, stream section %i on %s...",
solver[*intSolver-1], jobsection[nextjob],
remhostname[intmachine]);
irun=2;
j=nextjob+1,61ci27nvsetdata_( &irun,ind,&j,nob,sec,n1s,geom,
n1g,aero,n1a,n2a,bc,n1b,&numjobs,prof,n1p,n2p,nptt otarr,nptprarr,iret);
61ci27nvloadprog( nextjob,intmachine,ind,iret );
if (*iret == 0)
printf("launched.\n");
else
{
printf("failed. re-queueing.\n");
*iret=0;
/* remove machine from list and reset job status */
hoststatus[intmachine] = HOSTNOTAVAIL;
jobstatus[nextjob] = JOBAWAITING;
61ci_delete_bl_files_( &j, ind );
}
}
/* Get out of infinite loop now */
if (61ci27nvjobsongo()>0)
{
/* ========= WAIT FOR RESPONSE ========= */
/* ========= WAIT FOR RESPONSE ========= */
/* ========= WAIT FOR RESPONSE ========= */
timeout = 5*60*1000; /* 5 minutes */
pollval = poll ( fds, (unsigned long) MAXMACHINES, timeout );
*iret=999;
if ( pollval < 0 ) {
printf("Error %i while waiting for response. Exiting.\n",errno);
iLoop=FALSE;
61ci27nvkill_remaining_jobs();
return;
}
else if ( pollval == 0 ){
printf("Connection timed out, no response for %d seconds.
Exiting.\n",timeout/1000);
iLoop=FALSE;
61ci27nvkill_remaining_jobs();
return;
}
else {
*iret=0;
/* loop through the revents and determines which processe(s) have
returned stuff */
for (i=0; i<MAXMACHINES; i++)
{
ierroccurred = 0;
if ((fds[i].revents & POLLERR) || (fds[i].revents & POLLHUP))
{
/* there was an error: handle this */
printf("Section %i terminated => re-queing\n",jobsection[i]);
jobstatus[i] = JOBAWAITING; /* Set status to re-queue */
hoststatus[jobhost[i]] = HOSTNOTAVAIL;
}
else
{
if (fds[i].revents != 0){
/* recieve data from 27nv */
61ci_buflen = 61ci27nv_recv( fds[i].fd, &ierroccurred );
if ( ierroccurred != 0 )
{
61ci27nv_kill_job( i );
fds[i].fd=0;
ierroccurred = 1;
jobstatus[i] = JOBAWAITING; /* Set status to re-queue */
hoststatus[jobhost[i]] = HOSTNOTAVAIL;
}
else
{
/* check sum */
if (61ci_buflen != 61ci_27nvbuf_.buffer[0].d){
jobstatus[i] = JOBAWAITING; /* Set status to re-queue */
hoststatus[jobhost[i]] = HOSTNOTAVAIL;
printf("Checksum error with data from stream section %i -
re-queing.\n",
jobsection[i]);
/* close socket connection here */
fds[i].fd=0;
}
commbuf = &61ci_27nvbuf_.buffer[1].c;
if ( (j=strncmp(commbuf,"OK",2))==0)
{
printf("Stream section %i confirmed
running...\n",jobsection[i]);
jobstatus[i] = JOBCONNECTED;
hoststatus[jobhost[i]] = HOSTINUSE;
jobpid[i] = 61ci_27nvbuf_.buffer[2].i;
}
if ( (j=strncmp(commbuf,"DONE",4))==0)
{
/* TODO: Get 27nv data here... */
61ci_getheaders_(&fds[i].fd, iret);
if (*iret != 0)
{
printf("Stream section %i error getting return data -
re-queueing.\n",
jobsection[i]);
jobstatus[i] = JOBAWAITING; /* Set status to re-queue */
hoststatus[jobhost[i]] = HOSTNOTAVAIL;
61ci27nv_kill_job( i );
/* close socket connection here */
fds[i].fd=0;
}
else {
/* set up commons here from 27nvFB (in load module) */
irun=3;
j=i+1,61ci27nvsetdata_( &irun,ind,&j,nob,sec,n1s,geom,
n1g,aero,n1a,n2a,bc,n1b,&numjobs,prof,n1p,n2p,nptt otarr,
nptprarr,iret);
printf("Stream section %i completed.\n",jobsection[i]);
jobstatus[i] = JOBCOMPLETED;
hoststatus[jobhost[i]] = HOSTAVAIL;
/* close socket connection here */
close(fds[i].fd);
fds[i].fd=0;
}
}
if( (j=strncmp(commbuf,"ERROR",5))==0)
{
printf("Stream section %i reported an
error...\n",jobsection[i]);
jobstatus[i] = JOBERROR;
hoststatus[jobhost[i]] = HOSTAVAIL;
/* close socket connection here */
fds[i].fd=0;
}
}
}
}
fds[i].revents = 0;
}
}
/* Get number of awaiting jobs to be complete/awaiting run */
numjobs=0;
for (i=0; i<=MAXJOBS; i++)
{
if (jobstatus[i]==JOBAWAITING || jobstatus[i]==JOBCONNECTED ||
jobstatus[i]==JOBLAUNCHED)
numjobs++;
}
/* Get out of infinite loop now */
if (numjobs==0)
{
iLoop=FALSE;
printf("All jobs completed.\n");
}
}
}
61ci27nvtidy();
}
void 61ci27nvloadprog ( int job, int inthost, int *index, int *iret )
{
int cmdlen,i,timeout,flag,nc;
char *hostname,*cmdlin,*cwd;
char runsh1[] = "/bin/rsh "; /* add remote host here */
char runsh2[] = " \"setenv BLAY_PORT "; /* Add hostname_portnumber
here */
char runsh3[] = " ; cd ";
char runsh4[] = " ; ";
char runsh5[] = "sun4_SunOS_5.5.1/bin/blay_load \" > /dev/null 2>
/dev/null &";
char pluginname[] = "JB56";
char pluginpath[501] = "";
if (*iret!=0)
return;
/* set up environment var to xfer port number and hostname of this
m/c */
hostname = (char *) calloc ( (size_t) HOSTNAMELEN, sizeof(char));
cwd = getcwd(NULL,64);
gethostname ( hostname, (size_t) HOSTNAMELEN );
cmdlen = strlen(runsh1)+strlen(runsh2)+strlen(runsh3)+strle n(runsh4)+strlen(runsh5)+strlen(hostname)+
strlen(remhostname[inthost])+5+strlen(cwd);
cmdlin = (char *) calloc ( (size_t) cmdlen, sizeof(char));
if ( (30cs_port<=0) || (strlen(remhostname[inthost]) <= 0) ) {
*iret = 997;
return;
}
jw16_get_path_of_plugin_(pluginpath, pluginname, iret, 500, 4);
sczi3_(pluginpath, &nc, 500);
pluginpath[nc]=(char)0;
sprintf ( cmdlin, "%s%s%s%s_%04d%s%s%s%s%s", runsh1,
remhostname[inthost], runsh2,
hostname, 30cs_port, runsh3, cwd, runsh4, pluginpath, runsh5 );
printf(cmdlin);
system(cmdlin);
free(cmdlin);
/* wait for the remote job to connect to this m/c */
timeout = 15; /* seconds */
fds[job].fd = 61ci_wait_connect ( 30cs_sock, timeout );
if (fds[job].fd <= 0) {
*iret=998;
return;
}
fds[job].events = POLLIN | POLLPRI | POLLERR | POLLHUP | POLLNVAL |
POLLRDNORM | POLLRDBAND ;
61ci_27nvbuf_.fd = fds[job].fd;
jobstatus[job] = JOBLAUNCHED; /* Set status to launched */
jobhost[job] = inthost;
hoststatus[jobhost[job]] = HOSTINUSE; /* Set status to in use */
/* Cool, now, we are connected */
/* Send header data and a signal to load module to run 27nv */
61ci_sendheaders_(&fds[job].fd, iret);
if (*iret != 0)
*iret = 999;
sleep(1);
sprintf ( &61ci_27nvbuf_.buffer[0].c, "DATA " );
61ci_27nvbuf_.buffer[1].i = job+1;
61ci_27nvbuf_.buffer[2].i= *index;
61ci_27nvbuf_.lenbuf = 3;
flag = 61ci27nv_send_();
}
/* ================================================== =============================
*
* ================================================== =============================
*
* ================================================== =============================
*
*
*
* This routine is called when "any" exception signal is trapped,
"SIGCHLD" may *
* indicate that 30cs has terminated, a "CHECK" request is sent and if
an error or *
* an empty buffer is received the file descriptor is set to "-1" ( =
"unset" ). *
* Other signals indicate that JB16 is about to terminate, the routine
therefore *
* kills jobs. In both cases signal trapping is discontinued, control
is passed on *
* to any previously declared signal handler.
*
*
*
*================================================= ================================*/
void jb56_signal_handler ( int signl)
{
int n;
int nsignals;
nsignals = SIGCANCEL + 1;
if (61ci27nvjobsongo() == 0)
return;
if ( signl == SIGINT ) {
printf ("Interrupt intercepted, killing all launched boundary
layer jobs...\n");
61ci27nvkill_remaining_jobs();
}
if ( signl == SIGABRT ||
signl == SIGFPE ||
signl == SIGILL ||
signl == SIGSEGV ||
signl == SIGTERM ||
signl == SIGBUS ){
for (n=1; n<nsignals; n++) signal( n, SIG_DFL );
printf ("WARNING: JB56 terminated abnormally...\n");
61ci27nvkill_remaining_jobs();
raise (signl);
}
else if ( jb56_signal_handlers[signl] != (void (*)(int)) NULL )
(*jb56_signal_handlers[signl])(signl);
}
A different file now...
/* ================================================== ======================================
*
*
*
* VERSION DATE WHO WHY
*
* 1.0.0.0 L.Tracey New routine -> 30cs-27nv port
*
*
*
*================================================= =========================================*
* This file contains a set of basic communication routines
*
*
*
* 61ci_get_passive_socket -> Creates a "socket" for connection to
another program. *
*
*
* 61ci_wait_connect -> Wait for a connection on the identified socket
*
*
*
* 61ci_get_active_socket -> Creates a "socket" and attempts to use it
to connect to *
* the another program (i.e. from 27nv
to 30cs) *
*
*
* Procedure: Program "A" creates a passive socket by calling
"get_passive_socket" with *
* both arguments set to zero. It passes the
port number returned *
* and the current host name to program B, it
then calls *
* "wait_connect" on the socket to wait for
program "B" to respond. *
* (NB: "wait_connect" terminates if program B
doesn't respond *
* within a preset time interval.)
*
*
*
* Program "B" creates an active socket by calling
"get_active_socket" passing *
* the port number and hostname given it by
program "A" (this *
* connects program "B" to program "A")
*
*
*
* At this point the programs can communicate via send/recv
instructions, *
* program "A" uses the file descriptor returned by
"wait_connect" and program *
* "B" the "*sock" value returned by "get_active_socket"
*
*
*
*================================================= =========================================*/
#define MAIN_ROUTINE
#define SUN
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <netinet/in.h>
#include <netdb.h>
#include <unistd.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <limits.h>
#include <stdlib.h>
#ifdef SUN
#include <stropts.h>
#endif
#include <poll.h>
#define FALSE 0
#define TRUE 1
#define HOSTNAMELEN 20
#define MAXMSGLEN 10008
#include <61ci/buffer.h>
extern int h_errno;
/* ----------------------------------------------------------------------------------------
*/
int 61ci_get_passive_socket ( unsigned short int *port, int *sock )
{
int length;
struct sockaddr_in server;
if ((*sock = socket(AF_INET, SOCK_STREAM, 0)) < 0){
printf("\nget_passive_socket -> socket call failed,
error=%d",errno); fflush ( NULL );
return(-4);
}
server.sin_family = AF_INET;
server.sin_addr.s_addr = htonl(INADDR_ANY);
server.sin_port = htons(*port);
if (bind(*sock, (struct sockaddr *) &server, sizeof(server)) < 0){
close(*sock);
fprintf(stderr, "\nget_passive_socket -> bind call failed,
error=%d",errno);
fflush ( stderr );
return(-5);
}
length = sizeof(server);
if (getsockname(*sock, (struct sockaddr *) &server, &length)){
close(*sock);
fprintf(stderr, "\nget_passive_socket -> getsockname call failed,
error=%d",errno);
fflush (stderr);
return(-6);
}
*port=ntohs(server.sin_port);
if ( listen(*sock, 5) == -1 ) {
fprintf(stderr, "\nget_passive_socket -> listen failed with error
%d", errno );
fflush ( stderr );
return(-7);
}
return(0);
}
/* ----------------------------------------------------------------------------------------
*/
int 61ci_wait_connect ( int sock, int timeout )
{
char debug = FALSE;
struct sockaddr_in server;
struct pollfd fds[1];
int pollval,fd,length;
debug = (sock < 0);
sock = abs(sock);
fds[0].fd = sock;
fds[0].events = POLLIN | POLLPRI | POLLERR | POLLHUP | POLLNVAL |
POLLRDNORM | POLLRDBAND ;
if ( debug ) {
printf("########################################## ############################\n");
printf("########################################## ############################\n");
printf("########################################## ############################\n");
printf("WARNING: wait connect running in DEBUG mode, no timeout
on connection!\n");
printf("WARNING: wait connect running in DEBUG mode, no timeout
on connection!\n");
printf("WARNING: wait connect running in DEBUG mode, no timeout
on connection!\n");
printf("WARNING: wait connect running in DEBUG mode, no timeout
on connection!\n");
printf("WARNING: wait connect running in DEBUG mode, no timeout
on connection!\n");
printf("########################################## ############################\n");
printf("########################################## ############################\n");
printf("########################################## ############################\n");
pollval = poll ( fds, (unsigned long) 1, -1 );
}
else {
if ( timeout <= 0 ) timeout = 60000;
else timeout = 1000*timeout;
pollval = poll ( fds, (unsigned long) 1, timeout ); /* A reply
is expected within "timeout" milliseconds */
}
if ( pollval < 0 ) {
if ( errno == EINTR ) printf("Wait for connection terminated by
user interrupt.\n");
else printf("Error %d while waiting for
response\n",errno);
fd=-1;
}
else if ( pollval == 0 ){
printf("Connection timed out, no response for %d
seconds\n",timeout/1000);
fd=-1;
}
else {
length = sizeof(server);
if ((fd = accept(sock, (struct sockaddr *) &server, &length)) ==
-1){
printf( "\nwait_connect -> accept failed with error %d",
errno ); fflush ( NULL );
return(-8);
}
}
return (fd);
}
/* ----------------------------------------------------------------------------------------
*/
int 61ci_get_active_socket ( unsigned short int port, char *hostname,
int *sock )
{
static char host_name[HOSTNAMELEN];
struct sockaddr_in client;
struct hostent *hp, *gethostbyname();
if ( strcmp ( hostname, "" ) == 0 ) gethostname ( host_name,
(size_t) HOSTNAMELEN );
else strcpy ( host_name, hostname );
if ((*sock = socket(AF_INET, SOCK_STREAM, 0)) < 0){
printf("\nget_active_socket -> socket call failed,
error=%d",errno); fflush ( NULL );
return(-1);
}
client.sin_family = AF_INET;
if ((hp = gethostbyname(host_name)) == (struct hostent *) NULL){
close(*sock);
printf("\nget_active_socket -> gethostbyname call failed,
error=%d",h_errno); fflush ( NULL );
return(-2);
}
bcopy(hp->h_addr, &client.sin_addr, hp->h_length);
client.sin_port = htons(port);
if (connect(*sock, (struct sockaddr *) &client, sizeof(client)) <
0){
close(*sock);
printf("\nget_active_socket -> connect failed with error = %d",
errno ); fflush ( NULL );
return(-3);
}
return(0);
}
================================================== ============================
Lee Tracey Guest
-
unknown sigsegv code 0
Recently (yesterday) everytime I launch Adobe Acrobat 9 Pro, OR Adobe Acrobat Reader 9 - my system becomes unresponsive and hangs. A single CPU (1... -
Socket.accept problem via Socket.for_fd($stdin.fileno)
Hi, I am experiencing a rather infuriating problem with Socket.accept on Windows XP. The problem exists when I try to create a Socket from... -
SIGSEGV in memcpy() when starting a n application
Hello, I am getting SIGSEGV immedaitely in the initialization of my app. by GDB I see that it happens in memcpy() when starting a n application ... -
How to ignore SIGSEGV signal
hi there, how to ignore SIGSEGV signal, 'cause i tried to handle or ignore that signal on the code but it keep raised signal. is this one-shot... -
Socket State Before Call To Send()?
I am having troublee determining if a socket is still connected to the remote host before I make a call to send(). I know how to determine this... -
Lee Tracey #2
Re: Errno 25 ENOTTY then SIGSEGV, socket on accept call? Help.
Oh yes, forgot to say: I'm running only 1 "server" process via a Sun
Ultra 30 on sun4_SunOS_5.5.1 .
Thanks again for any tips you people might have.
Lee Tracey
Lee Tracey Guest
-
Jens.Toerring@physik.fu-berlin.de #3
Re: Errno 25 ENOTTY then SIGSEGV, socket on accept call? Help.
Lee Tracey <NOSPAM@leetracey.com> wrote:
How do you check errno after calling accept() when the call of> The problem I have is that the server SIGSEGV's when "accept()" is
> called.
> I notice that errno is -1 into accept() and 25 out of it.
accept() is segfaulting?
Your chances of getting helpful answers would be much higher if you> This is the first time I have attempted this, It was working
> Sorry for the large amount of code, I felt is was best to post all of
> it, for completeness...
would trim your program to the smallest version that still exhibits
the problem (you might even find the cause of the segfault yourself
in the process).
I didn't want to wade through 800 lines of strangly formatted code,
so instead I just checked the places where you do memory allocation
and found that in both cases you have some problems which might or
might not be the reason for the segfault you get (SIGSEGVs usually
being related to these kinds of problems).
And please post compiable programs. Your program contains several
invalid variable and function names. They never may start with a
digit, only letters (and possibly also an underscore, but you better
avoid this in order not to invade the implemantations namespace).
What's that? That are never a valid variable names. How did you manage> unsigned short int 30cs_port;
> int 30cs_sock;
to get the compiler not to give up immediately at these lines?
Invalid function name. Your compiler should tell you so.> void 61ci27nv_kill_job( i )
> {
> int cmdlen;
> char *cmdlin;Undefined behavior: you probably didn't allocate enough memory> if (jobstatus[i] == JOBCONNECTED && jobpid[i] > 0)
> /* can only do this for "connected" m/c's as */
> /* I don't have PID for "launched machines yet */
> {
> cmdlen = strlen(remhostname[jobhost[i]])+21;
> cmdlin = (char *) calloc ( (size_t) cmdlen, sizeof(char));
> sprintf ( cmdlin, "/bin/rsh %s \"kill -9 %i\"",
> remhostname[jobhost[i]], jobpid[i] );
for cmdlin, forgetting that the PID also requires some space in
the string. BTW, casting the return value of malloc(), calloc()
etc. isn't recommended - you don't win anything doing it but
just keep the compiler from complaining if you forgot to include
<stdlib.h>. And sizeof(char) is 1 per defitionem. Should you try
to stay on the save side in case you ever should switch to e.g.
wide chars then use something like "sizeof *cmdlin" instead.
> void 61ci27nvloadprog ( int job, int inthost, int *index, int *iret )
> {
> int cmdlen,i,timeout,flag,nc;
> char *hostname,*cmdlin,*cwd;Calling getcwd() with a NULL pointer as the first argument is an> char runsh1[] = "/bin/rsh "; /* add remote host here */
> char runsh2[] = " \"setenv BLAY_PORT "; /* Add hostname_portnumber
> here */
> char runsh3[] = " ; cd ";
> char runsh4[] = " ; ";
> char runsh5[] = "sun4_SunOS_5.5.1/bin/blay_load \" > /dev/null 2>
> /dev/null &";
> char pluginname[] = "JB56";
> char pluginpath[501] = "";
>
> if (*iret!=0)
> return;
>
> /* set up environment var to xfer port number and hostname of this
> m/c */
> hostname = (char *) calloc ( (size_t) HOSTNAMELEN, sizeof(char));
> cwd = getcwd(NULL,64);
extension that is only supported on some implementations and thus
should be avoided for sake of portability. And if you use it you
have to deallocate the memory the function has allocated for you
or you create a memory leak.
If the host name is longer than HOSTNAMELEN it's unspecified if the> gethostname ( hostname, (size_t) HOSTNAMELEN );
returned name is null-terminated. If it isn't 'hostname' isn't some-
thing that you can use as a string (as you do later in your program).
And you allow only 20 chars for HOSTNAMELEN which might be too short.
Why don't you use HOST_NAME_MAX + 1, which is guaranteed to be the
longest string required for a hostname (HOST_NAME_MAX can be determined
via the sysconf() function)?
> cmdlen = strlen(runsh1)+strlen(runsh2)+strlen(runsh3)+strle n(runsh4)+strlen(runsh5)+strlen(hostname)+
> strlen(remhostname[inthost])+5+strlen(cwd);
> cmdlin = (char *) calloc ( (size_t) cmdlen, sizeof(char));Again you probably don't allocate enough memory for cmdlin, so everything> if ( (30cs_port<=0) || (strlen(remhostname[inthost]) <= 0) ) {
> *iret = 997;
> return;
> }
> jw16_get_path_of_plugin_(pluginpath, pluginname, iret, 500, 4);
> sczi3_(pluginpath, &nc, 500);
> pluginpath[nc]=(char)0;
> sprintf ( cmdlin, "%s%s%s%s_%04d%s%s%s%s%s", runsh1,
> remhostname[inthost], runsh2,
> hostname, 30cs_port, runsh3, cwd, runsh4, pluginpath, runsh5 );
can happen after this statement has been executed.
Invalid function names. No self-respecting C compiler will accept them.> int 61ci_get_passive_socket ( unsigned short int *port, int *sock )
> int 61ci_wait_connect ( int sock, int timeout )
> int 61ci_get_active_socket ( unsigned short int port, char *hostname,
Regards, Jens
--
_ _____ _____
| ||_ _||_ _| [email]Jens.Toerring@physik.fu-berlin.de[/email]
_ | | | | | |
| |_| | | | | | [url]http://www.physik.fu-berlin.de/~toerring[/url]
\___/ens|_|homs|_|oerring
Jens.Toerring@physik.fu-berlin.de Guest
-
Lee Tracey #4
Re: Fixed
Jens.
I owe you a very big thank you. You spurred a point of enquiry based
on something I read earlier on in the day. The variable cmdlen was too
short as you suggested.
Regarding the variable/function names; I had to change them from what
they really are when posting to the public (the nature of my
employers' business), and apologise about that. I've posted some
comments about your questions below.
And another thank you.
Best regards
Lee Tracey
[email]Jens.Toerring@physik.fu-berlin.de[/email] wrote in message news:<be3l4j$tlll$1@fu-berlin.de>...I had attached with workshop/dbk and was able to view any variable at> Lee Tracey <NOSPAM@leetracey.com> wrote:>> > The problem I have is that the server SIGSEGV's when "accept()" is
> > called.
> > I notice that errno is -1 into accept() and 25 out of it.
> How do you check errno after calling accept() when the call of
> accept() is segfaulting?
segmentation time.
Thank you for pointing this out. I am recently new to c and will take>>> > This is the first time I have attempted this, It was working
> > Sorry for the large amount of code, I felt is was best to post all of
> > it, for completeness...
> Your chances of getting helpful answers would be much higher if you
> would trim your program to the smallest version that still exhibits
> the problem (you might even find the cause of the segfault yourself
> in the process).
>
> I didn't want to wade through 800 lines of strangly formatted code,
> so instead I just checked the places where you do memory allocation
> and found that in both cases you have some problems which might or
> might not be the reason for the segfault you get (SIGSEGVs usually
> being related to these kinds of problems).
>
> And please post compiable programs. Your program contains several
> invalid variable and function names. They never may start with a
> digit, only letters (and possibly also an underscore, but you better
> avoid this in order not to invade the implemantations namespace).
>>> > unsigned short int 30cs_port;
> > int 30cs_sock;
> What's that? That are never a valid variable names. How did you manage
> to get the compiler not to give up immediately at these lines?
>>> > void 61ci27nv_kill_job( i )
> Invalid function name. Your compiler should tell you so.
>>> > {
> > int cmdlen;
> > char *cmdlin;>> > if (jobstatus[i] == JOBCONNECTED && jobpid[i] > 0)
> > /* can only do this for "connected" m/c's as */
> > /* I don't have PID for "launched machines yet */
> > {
> > cmdlen = strlen(remhostname[jobhost[i]])+21;
> > cmdlin = (char *) calloc ( (size_t) cmdlen, sizeof(char));
> > sprintf ( cmdlin, "/bin/rsh %s \"kill -9 %i\"",
> > remhostname[jobhost[i]], jobpid[i] );
> Undefined behavior: you probably didn't allocate enough memory
> for cmdlin, forgetting that the PID also requires some space in
> the string. BTW, casting the return value of malloc(), calloc()
> etc. isn't recommended - you don't win anything doing it but
> just keep the compiler from complaining if you forgot to include
> <stdlib.h>. And sizeof(char) is 1 per defitionem. Should you try
> to stay on the save side in case you ever should switch to e.g.
> wide chars then use something like "sizeof *cmdlin" instead.
this advice on board.
I was unaware it was implementation specific: I'll look into improving>>> > void 61ci27nvloadprog ( int job, int inthost, int *index, int *iret )
> > {
> > int cmdlen,i,timeout,flag,nc;
> > char *hostname,*cmdlin,*cwd;>> > char runsh1[] = "/bin/rsh "; /* add remote host here */
> > char runsh2[] = " \"setenv BLAY_PORT "; /* Add hostname_portnumber
> > here */
> > char runsh3[] = " ; cd ";
> > char runsh4[] = " ; ";
> > char runsh5[] = "sun4_SunOS_5.5.1/bin/blay_load \" > /dev/null 2>
> > /dev/null &";
> > char pluginname[] = "JB56";
> > char pluginpath[501] = "";
> >
> > if (*iret!=0)
> > return;
> >
> > /* set up environment var to xfer port number and hostname of this
> > m/c */
> > hostname = (char *) calloc ( (size_t) HOSTNAMELEN, sizeof(char));
> > cwd = getcwd(NULL,64);
> Calling getcwd() with a NULL pointer as the first argument is an
> extension that is only supported on some implementations and thus
> should be avoided for sake of portability. And if you use it you
> have to deallocate the memory the function has allocated for you
> or you create a memory leak.
this.
Again, well spotted.>>> > gethostname ( hostname, (size_t) HOSTNAMELEN );
> If the host name is longer than HOSTNAMELEN it's unspecified if the
> returned name is null-terminated. If it isn't 'hostname' isn't some-
> thing that you can use as a string (as you do later in your program).
> And you allow only 20 chars for HOSTNAMELEN which might be too short.
> Why don't you use HOST_NAME_MAX + 1, which is guaranteed to be the
> longest string required for a hostname (HOST_NAME_MAX can be determined
> via the sysconf() function)?
This is where the _real_ problem was. I owe you for mentioning this.>>> > cmdlen = strlen(runsh1)+strlen(runsh2)+strlen(runsh3)+strle n(runsh4)+strlen(runsh5)+strlen(hostname)+
> > strlen(remhostname[inthost])+5+strlen(cwd);
> > cmdlin = (char *) calloc ( (size_t) cmdlen, sizeof(char));>> > if ( (30cs_port<=0) || (strlen(remhostname[inthost]) <= 0) ) {
> > *iret = 997;
> > return;
> > }
> > jw16_get_path_of_plugin_(pluginpath, pluginname, iret, 500, 4);
> > sczi3_(pluginpath, &nc, 500);
> > pluginpath[nc]=(char)0;
> > sprintf ( cmdlin, "%s%s%s%s_%04d%s%s%s%s%s", runsh1,
> > remhostname[inthost], runsh2,
> > hostname, 30cs_port, runsh3, cwd, runsh4, pluginpath, runsh5 );
> Again you probably don't allocate enough memory for cmdlin, so everything
> can happen after this statement has been executed.
>>> > int 61ci_get_passive_socket ( unsigned short int *port, int *sock )
> > int 61ci_wait_connect ( int sock, int timeout )
> > int 61ci_get_active_socket ( unsigned short int port, char *hostname,
> Invalid function names. No self-respecting C compiler will accept them.
>
> Regards, JensLee Tracey Guest



Reply With Quote

